aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-10-01 16:43:25 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-10-01 16:43:25 -0400
commitccf70ddcbe9984cee406be2bacfedd5e4776919d (patch)
treef9aaf50eab789d1cd8ce79a2e9e5804d86decfbc
parent46c8217c4a54c17dd4c000ad804fa1e223a10578 (diff)
parentd2922422c48df93f3edff7d872ee4f3191fefb08 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM fixes from Paolo Bonzini: "(Relatively) a lot of reverts, mostly. Bugs have trickled in for a new feature in 4.2 (MTRR support in guests) so I'm reverting it all; let's not make this -rc period busier for KVM than it's been so far. This covers the four reverts from me. The fifth patch is being reverted because Radim found a bug in the implementation of stable scheduler clock, *but* also managed to implement the feature entirely without hypervisor support. So instead of fixing the hypervisor side we can remove it completely; 4.4 will get the new implementation" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: Use WARN_ON_ONCE for missing X86_FEATURE_NRIPS Update KVM homepage Url Revert "KVM: SVM: use NPT page attributes" Revert "KVM: svm: handle KVM_X86_QUIRK_CD_NW_CLEARED in svm_get_mt_mask" Revert "KVM: SVM: Sync g_pat with guest-written PAT value" Revert "KVM: x86: apply guest MTRR virtualization on host reserved pages" Revert "KVM: x86: zero kvmclock_offset when vcpu0 initializes kvmclock system MSR"
-rw-r--r--MAINTAINERS4
-rw-r--r--arch/x86/include/asm/pvclock-abi.h1
-rw-r--r--arch/x86/kvm/svm.c125
-rw-r--r--arch/x86/kvm/vmx.c11
-rw-r--r--arch/x86/kvm/x86.c4
5 files changed, 24 insertions, 121 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 9f6685f6c5a9..797236befd27 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5957,7 +5957,7 @@ F: virt/kvm/
5957KERNEL VIRTUAL MACHINE (KVM) FOR AMD-V 5957KERNEL VIRTUAL MACHINE (KVM) FOR AMD-V
5958M: Joerg Roedel <joro@8bytes.org> 5958M: Joerg Roedel <joro@8bytes.org>
5959L: kvm@vger.kernel.org 5959L: kvm@vger.kernel.org
5960W: http://kvm.qumranet.com 5960W: http://www.linux-kvm.org/
5961S: Maintained 5961S: Maintained
5962F: arch/x86/include/asm/svm.h 5962F: arch/x86/include/asm/svm.h
5963F: arch/x86/kvm/svm.c 5963F: arch/x86/kvm/svm.c
@@ -5965,7 +5965,7 @@ F: arch/x86/kvm/svm.c
5965KERNEL VIRTUAL MACHINE (KVM) FOR POWERPC 5965KERNEL VIRTUAL MACHINE (KVM) FOR POWERPC
5966M: Alexander Graf <agraf@suse.com> 5966M: Alexander Graf <agraf@suse.com>
5967L: kvm-ppc@vger.kernel.org 5967L: kvm-ppc@vger.kernel.org
5968W: http://kvm.qumranet.com 5968W: http://www.linux-kvm.org/
5969T: git git://github.com/agraf/linux-2.6.git 5969T: git git://github.com/agraf/linux-2.6.git
5970S: Supported 5970S: Supported
5971F: arch/powerpc/include/asm/kvm* 5971F: arch/powerpc/include/asm/kvm*
diff --git a/arch/x86/include/asm/pvclock-abi.h b/arch/x86/include/asm/pvclock-abi.h
index 655e07a48f6c..67f08230103a 100644
--- a/arch/x86/include/asm/pvclock-abi.h
+++ b/arch/x86/include/asm/pvclock-abi.h
@@ -41,6 +41,7 @@ struct pvclock_wall_clock {
41 41
42#define PVCLOCK_TSC_STABLE_BIT (1 << 0) 42#define PVCLOCK_TSC_STABLE_BIT (1 << 0)
43#define PVCLOCK_GUEST_STOPPED (1 << 1) 43#define PVCLOCK_GUEST_STOPPED (1 << 1)
44/* PVCLOCK_COUNTS_FROM_ZERO broke ABI and can't be used anymore. */
44#define PVCLOCK_COUNTS_FROM_ZERO (1 << 2) 45#define PVCLOCK_COUNTS_FROM_ZERO (1 << 2)
45#endif /* __ASSEMBLY__ */ 46#endif /* __ASSEMBLY__ */
46#endif /* _ASM_X86_PVCLOCK_ABI_H */ 47#endif /* _ASM_X86_PVCLOCK_ABI_H */
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 94b7d15db3fc..2f9ed1ff0632 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -514,7 +514,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
514 struct vcpu_svm *svm = to_svm(vcpu); 514 struct vcpu_svm *svm = to_svm(vcpu);
515 515
516 if (svm->vmcb->control.next_rip != 0) { 516 if (svm->vmcb->control.next_rip != 0) {
517 WARN_ON(!static_cpu_has(X86_FEATURE_NRIPS)); 517 WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS));
518 svm->next_rip = svm->vmcb->control.next_rip; 518 svm->next_rip = svm->vmcb->control.next_rip;
519 } 519 }
520 520
@@ -866,64 +866,6 @@ static void svm_disable_lbrv(struct vcpu_svm *svm)
866 set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0); 866 set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
867} 867}
868 868
869#define MTRR_TYPE_UC_MINUS 7
870#define MTRR2PROTVAL_INVALID 0xff
871
872static u8 mtrr2protval[8];
873
874static u8 fallback_mtrr_type(int mtrr)
875{
876 /*
877 * WT and WP aren't always available in the host PAT. Treat
878 * them as UC and UC- respectively. Everything else should be
879 * there.
880 */
881 switch (mtrr)
882 {
883 case MTRR_TYPE_WRTHROUGH:
884 return MTRR_TYPE_UNCACHABLE;
885 case MTRR_TYPE_WRPROT:
886 return MTRR_TYPE_UC_MINUS;
887 default:
888 BUG();
889 }
890}
891
892static void build_mtrr2protval(void)
893{
894 int i;
895 u64 pat;
896
897 for (i = 0; i < 8; i++)
898 mtrr2protval[i] = MTRR2PROTVAL_INVALID;
899
900 /* Ignore the invalid MTRR types. */
901 mtrr2protval[2] = 0;
902 mtrr2protval[3] = 0;
903
904 /*
905 * Use host PAT value to figure out the mapping from guest MTRR
906 * values to nested page table PAT/PCD/PWT values. We do not
907 * want to change the host PAT value every time we enter the
908 * guest.
909 */
910 rdmsrl(MSR_IA32_CR_PAT, pat);
911 for (i = 0; i < 8; i++) {
912 u8 mtrr = pat >> (8 * i);
913
914 if (mtrr2protval[mtrr] == MTRR2PROTVAL_INVALID)
915 mtrr2protval[mtrr] = __cm_idx2pte(i);
916 }
917
918 for (i = 0; i < 8; i++) {
919 if (mtrr2protval[i] == MTRR2PROTVAL_INVALID) {
920 u8 fallback = fallback_mtrr_type(i);
921 mtrr2protval[i] = mtrr2protval[fallback];
922 BUG_ON(mtrr2protval[i] == MTRR2PROTVAL_INVALID);
923 }
924 }
925}
926
927static __init int svm_hardware_setup(void) 869static __init int svm_hardware_setup(void)
928{ 870{
929 int cpu; 871 int cpu;
@@ -990,7 +932,6 @@ static __init int svm_hardware_setup(void)
990 } else 932 } else
991 kvm_disable_tdp(); 933 kvm_disable_tdp();
992 934
993 build_mtrr2protval();
994 return 0; 935 return 0;
995 936
996err: 937err:
@@ -1145,43 +1086,6 @@ static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
1145 return target_tsc - tsc; 1086 return target_tsc - tsc;
1146} 1087}
1147 1088
1148static void svm_set_guest_pat(struct vcpu_svm *svm, u64 *g_pat)
1149{
1150 struct kvm_vcpu *vcpu = &svm->vcpu;
1151
1152 /* Unlike Intel, AMD takes the guest's CR0.CD into account.
1153 *
1154 * AMD does not have IPAT. To emulate it for the case of guests
1155 * with no assigned devices, just set everything to WB. If guests
1156 * have assigned devices, however, we cannot force WB for RAM
1157 * pages only, so use the guest PAT directly.
1158 */
1159 if (!kvm_arch_has_assigned_device(vcpu->kvm))
1160 *g_pat = 0x0606060606060606;
1161 else
1162 *g_pat = vcpu->arch.pat;
1163}
1164
1165static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
1166{
1167 u8 mtrr;
1168
1169 /*
1170 * 1. MMIO: trust guest MTRR, so same as item 3.
1171 * 2. No passthrough: always map as WB, and force guest PAT to WB as well
1172 * 3. Passthrough: can't guarantee the result, try to trust guest.
1173 */
1174 if (!is_mmio && !kvm_arch_has_assigned_device(vcpu->kvm))
1175 return 0;
1176
1177 if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED) &&
1178 kvm_read_cr0(vcpu) & X86_CR0_CD)
1179 return _PAGE_NOCACHE;
1180
1181 mtrr = kvm_mtrr_get_guest_memory_type(vcpu, gfn);
1182 return mtrr2protval[mtrr];
1183}
1184
1185static void init_vmcb(struct vcpu_svm *svm, bool init_event) 1089static void init_vmcb(struct vcpu_svm *svm, bool init_event)
1186{ 1090{
1187 struct vmcb_control_area *control = &svm->vmcb->control; 1091 struct vmcb_control_area *control = &svm->vmcb->control;
@@ -1278,7 +1182,6 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event)
1278 clr_cr_intercept(svm, INTERCEPT_CR3_READ); 1182 clr_cr_intercept(svm, INTERCEPT_CR3_READ);
1279 clr_cr_intercept(svm, INTERCEPT_CR3_WRITE); 1183 clr_cr_intercept(svm, INTERCEPT_CR3_WRITE);
1280 save->g_pat = svm->vcpu.arch.pat; 1184 save->g_pat = svm->vcpu.arch.pat;
1281 svm_set_guest_pat(svm, &save->g_pat);
1282 save->cr3 = 0; 1185 save->cr3 = 0;
1283 save->cr4 = 0; 1186 save->cr4 = 0;
1284 } 1187 }
@@ -1673,10 +1576,13 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1673 1576
1674 if (!vcpu->fpu_active) 1577 if (!vcpu->fpu_active)
1675 cr0 |= X86_CR0_TS; 1578 cr0 |= X86_CR0_TS;
1676 1579 /*
1677 /* These are emulated via page tables. */ 1580 * re-enable caching here because the QEMU bios
1678 cr0 &= ~(X86_CR0_CD | X86_CR0_NW); 1581 * does not do it - this results in some delay at
1679 1582 * reboot
1583 */
1584 if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
1585 cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
1680 svm->vmcb->save.cr0 = cr0; 1586 svm->vmcb->save.cr0 = cr0;
1681 mark_dirty(svm->vmcb, VMCB_CR); 1587 mark_dirty(svm->vmcb, VMCB_CR);
1682 update_cr0_intercept(svm); 1588 update_cr0_intercept(svm);
@@ -3351,16 +3257,6 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
3351 case MSR_VM_IGNNE: 3257 case MSR_VM_IGNNE:
3352 vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); 3258 vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
3353 break; 3259 break;
3354 case MSR_IA32_CR_PAT:
3355 if (npt_enabled) {
3356 if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
3357 return 1;
3358 vcpu->arch.pat = data;
3359 svm_set_guest_pat(svm, &svm->vmcb->save.g_pat);
3360 mark_dirty(svm->vmcb, VMCB_NPT);
3361 break;
3362 }
3363 /* fall through */
3364 default: 3260 default:
3365 return kvm_set_msr_common(vcpu, msr); 3261 return kvm_set_msr_common(vcpu, msr);
3366 } 3262 }
@@ -4195,6 +4091,11 @@ static bool svm_has_high_real_mode_segbase(void)
4195 return true; 4091 return true;
4196} 4092}
4197 4093
4094static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
4095{
4096 return 0;
4097}
4098
4198static void svm_cpuid_update(struct kvm_vcpu *vcpu) 4099static void svm_cpuid_update(struct kvm_vcpu *vcpu)
4199{ 4100{
4200} 4101}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 64076740251e..06ef4908ba61 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8617,17 +8617,22 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
8617 u64 ipat = 0; 8617 u64 ipat = 0;
8618 8618
8619 /* For VT-d and EPT combination 8619 /* For VT-d and EPT combination
8620 * 1. MMIO: guest may want to apply WC, trust it. 8620 * 1. MMIO: always map as UC
8621 * 2. EPT with VT-d: 8621 * 2. EPT with VT-d:
8622 * a. VT-d without snooping control feature: can't guarantee the 8622 * a. VT-d without snooping control feature: can't guarantee the
8623 * result, try to trust guest. So the same as item 1. 8623 * result, try to trust guest.
8624 * b. VT-d with snooping control feature: snooping control feature of 8624 * b. VT-d with snooping control feature: snooping control feature of
8625 * VT-d engine can guarantee the cache correctness. Just set it 8625 * VT-d engine can guarantee the cache correctness. Just set it
8626 * to WB to keep consistent with host. So the same as item 3. 8626 * to WB to keep consistent with host. So the same as item 3.
8627 * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep 8627 * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep
8628 * consistent with host MTRR 8628 * consistent with host MTRR
8629 */ 8629 */
8630 if (!is_mmio && !kvm_arch_has_noncoherent_dma(vcpu->kvm)) { 8630 if (is_mmio) {
8631 cache = MTRR_TYPE_UNCACHABLE;
8632 goto exit;
8633 }
8634
8635 if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
8631 ipat = VMX_EPT_IPAT_BIT; 8636 ipat = VMX_EPT_IPAT_BIT;
8632 cache = MTRR_TYPE_WRBACK; 8637 cache = MTRR_TYPE_WRBACK;
8633 goto exit; 8638 goto exit;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 991466bf8dee..92511d4b7236 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1708,8 +1708,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1708 vcpu->pvclock_set_guest_stopped_request = false; 1708 vcpu->pvclock_set_guest_stopped_request = false;
1709 } 1709 }
1710 1710
1711 pvclock_flags |= PVCLOCK_COUNTS_FROM_ZERO;
1712
1713 /* If the host uses TSC clocksource, then it is stable */ 1711 /* If the host uses TSC clocksource, then it is stable */
1714 if (use_master_clock) 1712 if (use_master_clock)
1715 pvclock_flags |= PVCLOCK_TSC_STABLE_BIT; 1713 pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
@@ -2007,8 +2005,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2007 &vcpu->requests); 2005 &vcpu->requests);
2008 2006
2009 ka->boot_vcpu_runs_old_kvmclock = tmp; 2007 ka->boot_vcpu_runs_old_kvmclock = tmp;
2010
2011 ka->kvmclock_offset = -get_kernel_ns();
2012 } 2008 }
2013 2009
2014 vcpu->arch.time = data; 2010 vcpu->arch.time = data;