diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-11-12 16:19:15 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-11-12 16:19:15 -0500 |
commit | 8c5bd25bf42effd194d4b0b43895c42b374e620b (patch) | |
tree | 6ab4ef4919d52a4cdde9c2506766371a5a894b93 | |
parent | eb094f06963bb0fd8134c6a9b805d4ad0002a7d4 (diff) | |
parent | a78986aae9b2988f8493f9f65a587ee433e83bc3 (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini:
"Fix unwinding of KVM_CREATE_VM failure, VT-d posted interrupts,
DAX/ZONE_DEVICE, and module unload/reload"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
KVM: MMU: Do not treat ZONE_DEVICE pages as being reserved
KVM: VMX: Introduce pi_is_pir_empty() helper
KVM: VMX: Do not change PID.NDST when loading a blocked vCPU
KVM: VMX: Consider PID.PIR to determine if vCPU has pending interrupts
KVM: VMX: Fix comment to specify PID.ON instead of PIR.ON
KVM: X86: Fix initialization of MSR lists
KVM: fix placement of refcount initialization
KVM: Fix NULL-ptr deref after kvm_create_vm fails
-rw-r--r-- | arch/x86/kvm/mmu.c | 8 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 23 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.h | 11 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 56 | ||||
-rw-r--r-- | include/linux/kvm_host.h | 1 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 48 |
6 files changed, 96 insertions, 51 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 529589a42afb..fd6012eef9c9 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -3393,7 +3393,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, | |||
3393 | * here. | 3393 | * here. |
3394 | */ | 3394 | */ |
3395 | if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) && | 3395 | if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) && |
3396 | level == PT_PAGE_TABLE_LEVEL && | 3396 | !kvm_is_zone_device_pfn(pfn) && level == PT_PAGE_TABLE_LEVEL && |
3397 | PageTransCompoundMap(pfn_to_page(pfn)) && | 3397 | PageTransCompoundMap(pfn_to_page(pfn)) && |
3398 | !mmu_gfn_lpage_is_disallowed(vcpu, gfn, PT_DIRECTORY_LEVEL)) { | 3398 | !mmu_gfn_lpage_is_disallowed(vcpu, gfn, PT_DIRECTORY_LEVEL)) { |
3399 | unsigned long mask; | 3399 | unsigned long mask; |
@@ -6009,9 +6009,9 @@ restart: | |||
6009 | * the guest, and the guest page table is using 4K page size | 6009 | * the guest, and the guest page table is using 4K page size |
6010 | * mapping if the indirect sp has level = 1. | 6010 | * mapping if the indirect sp has level = 1. |
6011 | */ | 6011 | */ |
6012 | if (sp->role.direct && | 6012 | if (sp->role.direct && !kvm_is_reserved_pfn(pfn) && |
6013 | !kvm_is_reserved_pfn(pfn) && | 6013 | !kvm_is_zone_device_pfn(pfn) && |
6014 | PageTransCompoundMap(pfn_to_page(pfn))) { | 6014 | PageTransCompoundMap(pfn_to_page(pfn))) { |
6015 | pte_list_remove(rmap_head, sptep); | 6015 | pte_list_remove(rmap_head, sptep); |
6016 | 6016 | ||
6017 | if (kvm_available_flush_tlb_with_range()) | 6017 | if (kvm_available_flush_tlb_with_range()) |
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 5d21a4ab28cf..04a8212704c1 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c | |||
@@ -1268,6 +1268,18 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) | |||
1268 | if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu) | 1268 | if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu) |
1269 | return; | 1269 | return; |
1270 | 1270 | ||
1271 | /* | ||
1272 | * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change | ||
1273 | * PI.NDST: pi_post_block is the one expected to change PID.NDST and the | ||
1274 | * wakeup handler expects the vCPU to be on the blocked_vcpu_list that | ||
1275 | * matches PI.NDST. Otherwise, a vcpu may not be able to be woken up | ||
1276 | * correctly. | ||
1277 | */ | ||
1278 | if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR || vcpu->cpu == cpu) { | ||
1279 | pi_clear_sn(pi_desc); | ||
1280 | goto after_clear_sn; | ||
1281 | } | ||
1282 | |||
1271 | /* The full case. */ | 1283 | /* The full case. */ |
1272 | do { | 1284 | do { |
1273 | old.control = new.control = pi_desc->control; | 1285 | old.control = new.control = pi_desc->control; |
@@ -1283,6 +1295,8 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) | |||
1283 | } while (cmpxchg64(&pi_desc->control, old.control, | 1295 | } while (cmpxchg64(&pi_desc->control, old.control, |
1284 | new.control) != old.control); | 1296 | new.control) != old.control); |
1285 | 1297 | ||
1298 | after_clear_sn: | ||
1299 | |||
1286 | /* | 1300 | /* |
1287 | * Clear SN before reading the bitmap. The VT-d firmware | 1301 | * Clear SN before reading the bitmap. The VT-d firmware |
1288 | * writes the bitmap and reads SN atomically (5.2.3 in the | 1302 | * writes the bitmap and reads SN atomically (5.2.3 in the |
@@ -1291,7 +1305,7 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) | |||
1291 | */ | 1305 | */ |
1292 | smp_mb__after_atomic(); | 1306 | smp_mb__after_atomic(); |
1293 | 1307 | ||
1294 | if (!bitmap_empty((unsigned long *)pi_desc->pir, NR_VECTORS)) | 1308 | if (!pi_is_pir_empty(pi_desc)) |
1295 | pi_set_on(pi_desc); | 1309 | pi_set_on(pi_desc); |
1296 | } | 1310 | } |
1297 | 1311 | ||
@@ -6137,7 +6151,7 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) | |||
6137 | if (pi_test_on(&vmx->pi_desc)) { | 6151 | if (pi_test_on(&vmx->pi_desc)) { |
6138 | pi_clear_on(&vmx->pi_desc); | 6152 | pi_clear_on(&vmx->pi_desc); |
6139 | /* | 6153 | /* |
6140 | * IOMMU can write to PIR.ON, so the barrier matters even on UP. | 6154 | * IOMMU can write to PID.ON, so the barrier matters even on UP. |
6141 | * But on x86 this is just a compiler barrier anyway. | 6155 | * But on x86 this is just a compiler barrier anyway. |
6142 | */ | 6156 | */ |
6143 | smp_mb__after_atomic(); | 6157 | smp_mb__after_atomic(); |
@@ -6167,7 +6181,10 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) | |||
6167 | 6181 | ||
6168 | static bool vmx_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu) | 6182 | static bool vmx_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu) |
6169 | { | 6183 | { |
6170 | return pi_test_on(vcpu_to_pi_desc(vcpu)); | 6184 | struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); |
6185 | |||
6186 | return pi_test_on(pi_desc) || | ||
6187 | (pi_test_sn(pi_desc) && !pi_is_pir_empty(pi_desc)); | ||
6171 | } | 6188 | } |
6172 | 6189 | ||
6173 | static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) | 6190 | static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) |
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index bee16687dc0b..5a0f34b1e226 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h | |||
@@ -355,6 +355,11 @@ static inline int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc) | |||
355 | return test_and_set_bit(vector, (unsigned long *)pi_desc->pir); | 355 | return test_and_set_bit(vector, (unsigned long *)pi_desc->pir); |
356 | } | 356 | } |
357 | 357 | ||
358 | static inline bool pi_is_pir_empty(struct pi_desc *pi_desc) | ||
359 | { | ||
360 | return bitmap_empty((unsigned long *)pi_desc->pir, NR_VECTORS); | ||
361 | } | ||
362 | |||
358 | static inline void pi_set_sn(struct pi_desc *pi_desc) | 363 | static inline void pi_set_sn(struct pi_desc *pi_desc) |
359 | { | 364 | { |
360 | set_bit(POSTED_INTR_SN, | 365 | set_bit(POSTED_INTR_SN, |
@@ -373,6 +378,12 @@ static inline void pi_clear_on(struct pi_desc *pi_desc) | |||
373 | (unsigned long *)&pi_desc->control); | 378 | (unsigned long *)&pi_desc->control); |
374 | } | 379 | } |
375 | 380 | ||
381 | static inline void pi_clear_sn(struct pi_desc *pi_desc) | ||
382 | { | ||
383 | clear_bit(POSTED_INTR_SN, | ||
384 | (unsigned long *)&pi_desc->control); | ||
385 | } | ||
386 | |||
376 | static inline int pi_test_on(struct pi_desc *pi_desc) | 387 | static inline int pi_test_on(struct pi_desc *pi_desc) |
377 | { | 388 | { |
378 | return test_bit(POSTED_INTR_ON, | 389 | return test_bit(POSTED_INTR_ON, |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a30e9962a6ef..7db5c8ef35dd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -1133,13 +1133,15 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); | |||
1133 | * List of msr numbers which we expose to userspace through KVM_GET_MSRS | 1133 | * List of msr numbers which we expose to userspace through KVM_GET_MSRS |
1134 | * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. | 1134 | * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. |
1135 | * | 1135 | * |
1136 | * This list is modified at module load time to reflect the | 1136 | * The three MSR lists(msrs_to_save, emulated_msrs, msr_based_features) |
1137 | * extract the supported MSRs from the related const lists. | ||
1138 | * msrs_to_save is selected from the msrs_to_save_all to reflect the | ||
1137 | * capabilities of the host cpu. This capabilities test skips MSRs that are | 1139 | * capabilities of the host cpu. This capabilities test skips MSRs that are |
1138 | * kvm-specific. Those are put in emulated_msrs; filtering of emulated_msrs | 1140 | * kvm-specific. Those are put in emulated_msrs_all; filtering of emulated_msrs |
1139 | * may depend on host virtualization features rather than host cpu features. | 1141 | * may depend on host virtualization features rather than host cpu features. |
1140 | */ | 1142 | */ |
1141 | 1143 | ||
1142 | static u32 msrs_to_save[] = { | 1144 | static const u32 msrs_to_save_all[] = { |
1143 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, | 1145 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, |
1144 | MSR_STAR, | 1146 | MSR_STAR, |
1145 | #ifdef CONFIG_X86_64 | 1147 | #ifdef CONFIG_X86_64 |
@@ -1180,9 +1182,10 @@ static u32 msrs_to_save[] = { | |||
1180 | MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17, | 1182 | MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17, |
1181 | }; | 1183 | }; |
1182 | 1184 | ||
1185 | static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_all)]; | ||
1183 | static unsigned num_msrs_to_save; | 1186 | static unsigned num_msrs_to_save; |
1184 | 1187 | ||
1185 | static u32 emulated_msrs[] = { | 1188 | static const u32 emulated_msrs_all[] = { |
1186 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, | 1189 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, |
1187 | MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, | 1190 | MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, |
1188 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, | 1191 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, |
@@ -1221,7 +1224,7 @@ static u32 emulated_msrs[] = { | |||
1221 | * by arch/x86/kvm/vmx/nested.c based on CPUID or other MSRs. | 1224 | * by arch/x86/kvm/vmx/nested.c based on CPUID or other MSRs. |
1222 | * We always support the "true" VMX control MSRs, even if the host | 1225 | * We always support the "true" VMX control MSRs, even if the host |
1223 | * processor does not, so I am putting these registers here rather | 1226 | * processor does not, so I am putting these registers here rather |
1224 | * than in msrs_to_save. | 1227 | * than in msrs_to_save_all. |
1225 | */ | 1228 | */ |
1226 | MSR_IA32_VMX_BASIC, | 1229 | MSR_IA32_VMX_BASIC, |
1227 | MSR_IA32_VMX_TRUE_PINBASED_CTLS, | 1230 | MSR_IA32_VMX_TRUE_PINBASED_CTLS, |
@@ -1240,13 +1243,14 @@ static u32 emulated_msrs[] = { | |||
1240 | MSR_KVM_POLL_CONTROL, | 1243 | MSR_KVM_POLL_CONTROL, |
1241 | }; | 1244 | }; |
1242 | 1245 | ||
1246 | static u32 emulated_msrs[ARRAY_SIZE(emulated_msrs_all)]; | ||
1243 | static unsigned num_emulated_msrs; | 1247 | static unsigned num_emulated_msrs; |
1244 | 1248 | ||
1245 | /* | 1249 | /* |
1246 | * List of msr numbers which are used to expose MSR-based features that | 1250 | * List of msr numbers which are used to expose MSR-based features that |
1247 | * can be used by a hypervisor to validate requested CPU features. | 1251 | * can be used by a hypervisor to validate requested CPU features. |
1248 | */ | 1252 | */ |
1249 | static u32 msr_based_features[] = { | 1253 | static const u32 msr_based_features_all[] = { |
1250 | MSR_IA32_VMX_BASIC, | 1254 | MSR_IA32_VMX_BASIC, |
1251 | MSR_IA32_VMX_TRUE_PINBASED_CTLS, | 1255 | MSR_IA32_VMX_TRUE_PINBASED_CTLS, |
1252 | MSR_IA32_VMX_PINBASED_CTLS, | 1256 | MSR_IA32_VMX_PINBASED_CTLS, |
@@ -1271,6 +1275,7 @@ static u32 msr_based_features[] = { | |||
1271 | MSR_IA32_ARCH_CAPABILITIES, | 1275 | MSR_IA32_ARCH_CAPABILITIES, |
1272 | }; | 1276 | }; |
1273 | 1277 | ||
1278 | static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)]; | ||
1274 | static unsigned int num_msr_based_features; | 1279 | static unsigned int num_msr_based_features; |
1275 | 1280 | ||
1276 | static u64 kvm_get_arch_capabilities(void) | 1281 | static u64 kvm_get_arch_capabilities(void) |
@@ -5118,22 +5123,22 @@ static void kvm_init_msr_list(void) | |||
5118 | { | 5123 | { |
5119 | struct x86_pmu_capability x86_pmu; | 5124 | struct x86_pmu_capability x86_pmu; |
5120 | u32 dummy[2]; | 5125 | u32 dummy[2]; |
5121 | unsigned i, j; | 5126 | unsigned i; |
5122 | 5127 | ||
5123 | BUILD_BUG_ON_MSG(INTEL_PMC_MAX_FIXED != 4, | 5128 | BUILD_BUG_ON_MSG(INTEL_PMC_MAX_FIXED != 4, |
5124 | "Please update the fixed PMCs in msrs_to_save[]"); | 5129 | "Please update the fixed PMCs in msrs_to_saved_all[]"); |
5125 | 5130 | ||
5126 | perf_get_x86_pmu_capability(&x86_pmu); | 5131 | perf_get_x86_pmu_capability(&x86_pmu); |
5127 | 5132 | ||
5128 | for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) { | 5133 | for (i = 0; i < ARRAY_SIZE(msrs_to_save_all); i++) { |
5129 | if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0) | 5134 | if (rdmsr_safe(msrs_to_save_all[i], &dummy[0], &dummy[1]) < 0) |
5130 | continue; | 5135 | continue; |
5131 | 5136 | ||
5132 | /* | 5137 | /* |
5133 | * Even MSRs that are valid in the host may not be exposed | 5138 | * Even MSRs that are valid in the host may not be exposed |
5134 | * to the guests in some cases. | 5139 | * to the guests in some cases. |
5135 | */ | 5140 | */ |
5136 | switch (msrs_to_save[i]) { | 5141 | switch (msrs_to_save_all[i]) { |
5137 | case MSR_IA32_BNDCFGS: | 5142 | case MSR_IA32_BNDCFGS: |
5138 | if (!kvm_mpx_supported()) | 5143 | if (!kvm_mpx_supported()) |
5139 | continue; | 5144 | continue; |
@@ -5161,17 +5166,17 @@ static void kvm_init_msr_list(void) | |||
5161 | break; | 5166 | break; |
5162 | case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: { | 5167 | case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: { |
5163 | if (!kvm_x86_ops->pt_supported() || | 5168 | if (!kvm_x86_ops->pt_supported() || |
5164 | msrs_to_save[i] - MSR_IA32_RTIT_ADDR0_A >= | 5169 | msrs_to_save_all[i] - MSR_IA32_RTIT_ADDR0_A >= |
5165 | intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2) | 5170 | intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2) |
5166 | continue; | 5171 | continue; |
5167 | break; | 5172 | break; |
5168 | case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 17: | 5173 | case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 17: |
5169 | if (msrs_to_save[i] - MSR_ARCH_PERFMON_PERFCTR0 >= | 5174 | if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_PERFCTR0 >= |
5170 | min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp)) | 5175 | min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp)) |
5171 | continue; | 5176 | continue; |
5172 | break; | 5177 | break; |
5173 | case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 17: | 5178 | case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 17: |
5174 | if (msrs_to_save[i] - MSR_ARCH_PERFMON_EVENTSEL0 >= | 5179 | if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_EVENTSEL0 >= |
5175 | min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp)) | 5180 | min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp)) |
5176 | continue; | 5181 | continue; |
5177 | } | 5182 | } |
@@ -5179,34 +5184,25 @@ static void kvm_init_msr_list(void) | |||
5179 | break; | 5184 | break; |
5180 | } | 5185 | } |
5181 | 5186 | ||
5182 | if (j < i) | 5187 | msrs_to_save[num_msrs_to_save++] = msrs_to_save_all[i]; |
5183 | msrs_to_save[j] = msrs_to_save[i]; | ||
5184 | j++; | ||
5185 | } | 5188 | } |
5186 | num_msrs_to_save = j; | ||
5187 | 5189 | ||
5188 | for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) { | 5190 | for (i = 0; i < ARRAY_SIZE(emulated_msrs_all); i++) { |
5189 | if (!kvm_x86_ops->has_emulated_msr(emulated_msrs[i])) | 5191 | if (!kvm_x86_ops->has_emulated_msr(emulated_msrs_all[i])) |
5190 | continue; | 5192 | continue; |
5191 | 5193 | ||
5192 | if (j < i) | 5194 | emulated_msrs[num_emulated_msrs++] = emulated_msrs_all[i]; |
5193 | emulated_msrs[j] = emulated_msrs[i]; | ||
5194 | j++; | ||
5195 | } | 5195 | } |
5196 | num_emulated_msrs = j; | ||
5197 | 5196 | ||
5198 | for (i = j = 0; i < ARRAY_SIZE(msr_based_features); i++) { | 5197 | for (i = 0; i < ARRAY_SIZE(msr_based_features_all); i++) { |
5199 | struct kvm_msr_entry msr; | 5198 | struct kvm_msr_entry msr; |
5200 | 5199 | ||
5201 | msr.index = msr_based_features[i]; | 5200 | msr.index = msr_based_features_all[i]; |
5202 | if (kvm_get_msr_feature(&msr)) | 5201 | if (kvm_get_msr_feature(&msr)) |
5203 | continue; | 5202 | continue; |
5204 | 5203 | ||
5205 | if (j < i) | 5204 | msr_based_features[num_msr_based_features++] = msr_based_features_all[i]; |
5206 | msr_based_features[j] = msr_based_features[i]; | ||
5207 | j++; | ||
5208 | } | 5205 | } |
5209 | num_msr_based_features = j; | ||
5210 | } | 5206 | } |
5211 | 5207 | ||
5212 | static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, | 5208 | static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, |
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 52ed5f66e8f9..d41c521a39da 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
@@ -966,6 +966,7 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); | |||
966 | void kvm_vcpu_kick(struct kvm_vcpu *vcpu); | 966 | void kvm_vcpu_kick(struct kvm_vcpu *vcpu); |
967 | 967 | ||
968 | bool kvm_is_reserved_pfn(kvm_pfn_t pfn); | 968 | bool kvm_is_reserved_pfn(kvm_pfn_t pfn); |
969 | bool kvm_is_zone_device_pfn(kvm_pfn_t pfn); | ||
969 | 970 | ||
970 | struct kvm_irq_ack_notifier { | 971 | struct kvm_irq_ack_notifier { |
971 | struct hlist_node link; | 972 | struct hlist_node link; |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 4aab3547a165..524cff24a68d 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -150,10 +150,30 @@ __weak int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, | |||
150 | return 0; | 150 | return 0; |
151 | } | 151 | } |
152 | 152 | ||
153 | bool kvm_is_zone_device_pfn(kvm_pfn_t pfn) | ||
154 | { | ||
155 | /* | ||
156 | * The metadata used by is_zone_device_page() to determine whether or | ||
157 | * not a page is ZONE_DEVICE is guaranteed to be valid if and only if | ||
158 | * the device has been pinned, e.g. by get_user_pages(). WARN if the | ||
159 | * page_count() is zero to help detect bad usage of this helper. | ||
160 | */ | ||
161 | if (!pfn_valid(pfn) || WARN_ON_ONCE(!page_count(pfn_to_page(pfn)))) | ||
162 | return false; | ||
163 | |||
164 | return is_zone_device_page(pfn_to_page(pfn)); | ||
165 | } | ||
166 | |||
153 | bool kvm_is_reserved_pfn(kvm_pfn_t pfn) | 167 | bool kvm_is_reserved_pfn(kvm_pfn_t pfn) |
154 | { | 168 | { |
169 | /* | ||
170 | * ZONE_DEVICE pages currently set PG_reserved, but from a refcounting | ||
171 | * perspective they are "normal" pages, albeit with slightly different | ||
172 | * usage rules. | ||
173 | */ | ||
155 | if (pfn_valid(pfn)) | 174 | if (pfn_valid(pfn)) |
156 | return PageReserved(pfn_to_page(pfn)); | 175 | return PageReserved(pfn_to_page(pfn)) && |
176 | !kvm_is_zone_device_pfn(pfn); | ||
157 | 177 | ||
158 | return true; | 178 | return true; |
159 | } | 179 | } |
@@ -663,6 +683,12 @@ static struct kvm *kvm_create_vm(unsigned long type) | |||
663 | 683 | ||
664 | BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX); | 684 | BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX); |
665 | 685 | ||
686 | if (init_srcu_struct(&kvm->srcu)) | ||
687 | goto out_err_no_srcu; | ||
688 | if (init_srcu_struct(&kvm->irq_srcu)) | ||
689 | goto out_err_no_irq_srcu; | ||
690 | |||
691 | refcount_set(&kvm->users_count, 1); | ||
666 | for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { | 692 | for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { |
667 | struct kvm_memslots *slots = kvm_alloc_memslots(); | 693 | struct kvm_memslots *slots = kvm_alloc_memslots(); |
668 | 694 | ||
@@ -680,7 +706,6 @@ static struct kvm *kvm_create_vm(unsigned long type) | |||
680 | goto out_err_no_arch_destroy_vm; | 706 | goto out_err_no_arch_destroy_vm; |
681 | } | 707 | } |
682 | 708 | ||
683 | refcount_set(&kvm->users_count, 1); | ||
684 | r = kvm_arch_init_vm(kvm, type); | 709 | r = kvm_arch_init_vm(kvm, type); |
685 | if (r) | 710 | if (r) |
686 | goto out_err_no_arch_destroy_vm; | 711 | goto out_err_no_arch_destroy_vm; |
@@ -693,11 +718,6 @@ static struct kvm *kvm_create_vm(unsigned long type) | |||
693 | INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); | 718 | INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); |
694 | #endif | 719 | #endif |
695 | 720 | ||
696 | if (init_srcu_struct(&kvm->srcu)) | ||
697 | goto out_err_no_srcu; | ||
698 | if (init_srcu_struct(&kvm->irq_srcu)) | ||
699 | goto out_err_no_irq_srcu; | ||
700 | |||
701 | r = kvm_init_mmu_notifier(kvm); | 721 | r = kvm_init_mmu_notifier(kvm); |
702 | if (r) | 722 | if (r) |
703 | goto out_err_no_mmu_notifier; | 723 | goto out_err_no_mmu_notifier; |
@@ -720,19 +740,19 @@ out_err: | |||
720 | mmu_notifier_unregister(&kvm->mmu_notifier, current->mm); | 740 | mmu_notifier_unregister(&kvm->mmu_notifier, current->mm); |
721 | #endif | 741 | #endif |
722 | out_err_no_mmu_notifier: | 742 | out_err_no_mmu_notifier: |
723 | cleanup_srcu_struct(&kvm->irq_srcu); | ||
724 | out_err_no_irq_srcu: | ||
725 | cleanup_srcu_struct(&kvm->srcu); | ||
726 | out_err_no_srcu: | ||
727 | hardware_disable_all(); | 743 | hardware_disable_all(); |
728 | out_err_no_disable: | 744 | out_err_no_disable: |
729 | kvm_arch_destroy_vm(kvm); | 745 | kvm_arch_destroy_vm(kvm); |
730 | WARN_ON_ONCE(!refcount_dec_and_test(&kvm->users_count)); | ||
731 | out_err_no_arch_destroy_vm: | 746 | out_err_no_arch_destroy_vm: |
747 | WARN_ON_ONCE(!refcount_dec_and_test(&kvm->users_count)); | ||
732 | for (i = 0; i < KVM_NR_BUSES; i++) | 748 | for (i = 0; i < KVM_NR_BUSES; i++) |
733 | kfree(kvm_get_bus(kvm, i)); | 749 | kfree(kvm_get_bus(kvm, i)); |
734 | for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) | 750 | for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) |
735 | kvm_free_memslots(kvm, __kvm_memslots(kvm, i)); | 751 | kvm_free_memslots(kvm, __kvm_memslots(kvm, i)); |
752 | cleanup_srcu_struct(&kvm->irq_srcu); | ||
753 | out_err_no_irq_srcu: | ||
754 | cleanup_srcu_struct(&kvm->srcu); | ||
755 | out_err_no_srcu: | ||
736 | kvm_arch_free_vm(kvm); | 756 | kvm_arch_free_vm(kvm); |
737 | mmdrop(current->mm); | 757 | mmdrop(current->mm); |
738 | return ERR_PTR(r); | 758 | return ERR_PTR(r); |
@@ -1886,7 +1906,7 @@ EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty); | |||
1886 | 1906 | ||
1887 | void kvm_set_pfn_dirty(kvm_pfn_t pfn) | 1907 | void kvm_set_pfn_dirty(kvm_pfn_t pfn) |
1888 | { | 1908 | { |
1889 | if (!kvm_is_reserved_pfn(pfn)) { | 1909 | if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn)) { |
1890 | struct page *page = pfn_to_page(pfn); | 1910 | struct page *page = pfn_to_page(pfn); |
1891 | 1911 | ||
1892 | SetPageDirty(page); | 1912 | SetPageDirty(page); |
@@ -1896,7 +1916,7 @@ EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty); | |||
1896 | 1916 | ||
1897 | void kvm_set_pfn_accessed(kvm_pfn_t pfn) | 1917 | void kvm_set_pfn_accessed(kvm_pfn_t pfn) |
1898 | { | 1918 | { |
1899 | if (!kvm_is_reserved_pfn(pfn)) | 1919 | if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn)) |
1900 | mark_page_accessed(pfn_to_page(pfn)); | 1920 | mark_page_accessed(pfn_to_page(pfn)); |
1901 | } | 1921 | } |
1902 | EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); | 1922 | EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); |