summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-11-12 16:19:15 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2019-11-12 16:19:15 -0500
commit8c5bd25bf42effd194d4b0b43895c42b374e620b (patch)
tree6ab4ef4919d52a4cdde9c2506766371a5a894b93
parenteb094f06963bb0fd8134c6a9b805d4ad0002a7d4 (diff)
parenta78986aae9b2988f8493f9f65a587ee433e83bc3 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini: "Fix unwinding of KVM_CREATE_VM failure, VT-d posted interrupts, DAX/ZONE_DEVICE, and module unload/reload" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: MMU: Do not treat ZONE_DEVICE pages as being reserved KVM: VMX: Introduce pi_is_pir_empty() helper KVM: VMX: Do not change PID.NDST when loading a blocked vCPU KVM: VMX: Consider PID.PIR to determine if vCPU has pending interrupts KVM: VMX: Fix comment to specify PID.ON instead of PIR.ON KVM: X86: Fix initialization of MSR lists KVM: fix placement of refcount initialization KVM: Fix NULL-ptr deref after kvm_create_vm fails
-rw-r--r--arch/x86/kvm/mmu.c8
-rw-r--r--arch/x86/kvm/vmx/vmx.c23
-rw-r--r--arch/x86/kvm/vmx/vmx.h11
-rw-r--r--arch/x86/kvm/x86.c56
-rw-r--r--include/linux/kvm_host.h1
-rw-r--r--virt/kvm/kvm_main.c48
6 files changed, 96 insertions, 51 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 529589a42afb..fd6012eef9c9 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3393,7 +3393,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
3393 * here. 3393 * here.
3394 */ 3394 */
3395 if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) && 3395 if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) &&
3396 level == PT_PAGE_TABLE_LEVEL && 3396 !kvm_is_zone_device_pfn(pfn) && level == PT_PAGE_TABLE_LEVEL &&
3397 PageTransCompoundMap(pfn_to_page(pfn)) && 3397 PageTransCompoundMap(pfn_to_page(pfn)) &&
3398 !mmu_gfn_lpage_is_disallowed(vcpu, gfn, PT_DIRECTORY_LEVEL)) { 3398 !mmu_gfn_lpage_is_disallowed(vcpu, gfn, PT_DIRECTORY_LEVEL)) {
3399 unsigned long mask; 3399 unsigned long mask;
@@ -6009,9 +6009,9 @@ restart:
6009 * the guest, and the guest page table is using 4K page size 6009 * the guest, and the guest page table is using 4K page size
6010 * mapping if the indirect sp has level = 1. 6010 * mapping if the indirect sp has level = 1.
6011 */ 6011 */
6012 if (sp->role.direct && 6012 if (sp->role.direct && !kvm_is_reserved_pfn(pfn) &&
6013 !kvm_is_reserved_pfn(pfn) && 6013 !kvm_is_zone_device_pfn(pfn) &&
6014 PageTransCompoundMap(pfn_to_page(pfn))) { 6014 PageTransCompoundMap(pfn_to_page(pfn))) {
6015 pte_list_remove(rmap_head, sptep); 6015 pte_list_remove(rmap_head, sptep);
6016 6016
6017 if (kvm_available_flush_tlb_with_range()) 6017 if (kvm_available_flush_tlb_with_range())
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 5d21a4ab28cf..04a8212704c1 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1268,6 +1268,18 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
1268 if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu) 1268 if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
1269 return; 1269 return;
1270 1270
1271 /*
1272 * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
1273 * PI.NDST: pi_post_block is the one expected to change PID.NDST and the
1274 * wakeup handler expects the vCPU to be on the blocked_vcpu_list that
1275 * matches PI.NDST. Otherwise, a vcpu may not be able to be woken up
1276 * correctly.
1277 */
1278 if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR || vcpu->cpu == cpu) {
1279 pi_clear_sn(pi_desc);
1280 goto after_clear_sn;
1281 }
1282
1271 /* The full case. */ 1283 /* The full case. */
1272 do { 1284 do {
1273 old.control = new.control = pi_desc->control; 1285 old.control = new.control = pi_desc->control;
@@ -1283,6 +1295,8 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
1283 } while (cmpxchg64(&pi_desc->control, old.control, 1295 } while (cmpxchg64(&pi_desc->control, old.control,
1284 new.control) != old.control); 1296 new.control) != old.control);
1285 1297
1298after_clear_sn:
1299
1286 /* 1300 /*
1287 * Clear SN before reading the bitmap. The VT-d firmware 1301 * Clear SN before reading the bitmap. The VT-d firmware
1288 * writes the bitmap and reads SN atomically (5.2.3 in the 1302 * writes the bitmap and reads SN atomically (5.2.3 in the
@@ -1291,7 +1305,7 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
1291 */ 1305 */
1292 smp_mb__after_atomic(); 1306 smp_mb__after_atomic();
1293 1307
1294 if (!bitmap_empty((unsigned long *)pi_desc->pir, NR_VECTORS)) 1308 if (!pi_is_pir_empty(pi_desc))
1295 pi_set_on(pi_desc); 1309 pi_set_on(pi_desc);
1296} 1310}
1297 1311
@@ -6137,7 +6151,7 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
6137 if (pi_test_on(&vmx->pi_desc)) { 6151 if (pi_test_on(&vmx->pi_desc)) {
6138 pi_clear_on(&vmx->pi_desc); 6152 pi_clear_on(&vmx->pi_desc);
6139 /* 6153 /*
6140 * IOMMU can write to PIR.ON, so the barrier matters even on UP. 6154 * IOMMU can write to PID.ON, so the barrier matters even on UP.
6141 * But on x86 this is just a compiler barrier anyway. 6155 * But on x86 this is just a compiler barrier anyway.
6142 */ 6156 */
6143 smp_mb__after_atomic(); 6157 smp_mb__after_atomic();
@@ -6167,7 +6181,10 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
6167 6181
6168static bool vmx_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu) 6182static bool vmx_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
6169{ 6183{
6170 return pi_test_on(vcpu_to_pi_desc(vcpu)); 6184 struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
6185
6186 return pi_test_on(pi_desc) ||
6187 (pi_test_sn(pi_desc) && !pi_is_pir_empty(pi_desc));
6171} 6188}
6172 6189
6173static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) 6190static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index bee16687dc0b..5a0f34b1e226 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -355,6 +355,11 @@ static inline int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
355 return test_and_set_bit(vector, (unsigned long *)pi_desc->pir); 355 return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
356} 356}
357 357
358static inline bool pi_is_pir_empty(struct pi_desc *pi_desc)
359{
360 return bitmap_empty((unsigned long *)pi_desc->pir, NR_VECTORS);
361}
362
358static inline void pi_set_sn(struct pi_desc *pi_desc) 363static inline void pi_set_sn(struct pi_desc *pi_desc)
359{ 364{
360 set_bit(POSTED_INTR_SN, 365 set_bit(POSTED_INTR_SN,
@@ -373,6 +378,12 @@ static inline void pi_clear_on(struct pi_desc *pi_desc)
373 (unsigned long *)&pi_desc->control); 378 (unsigned long *)&pi_desc->control);
374} 379}
375 380
381static inline void pi_clear_sn(struct pi_desc *pi_desc)
382{
383 clear_bit(POSTED_INTR_SN,
384 (unsigned long *)&pi_desc->control);
385}
386
376static inline int pi_test_on(struct pi_desc *pi_desc) 387static inline int pi_test_on(struct pi_desc *pi_desc)
377{ 388{
378 return test_bit(POSTED_INTR_ON, 389 return test_bit(POSTED_INTR_ON,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a30e9962a6ef..7db5c8ef35dd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1133,13 +1133,15 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
1133 * List of msr numbers which we expose to userspace through KVM_GET_MSRS 1133 * List of msr numbers which we expose to userspace through KVM_GET_MSRS
1134 * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. 1134 * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
1135 * 1135 *
1136 * This list is modified at module load time to reflect the 1136 * The three MSR lists(msrs_to_save, emulated_msrs, msr_based_features)
1137 * extract the supported MSRs from the related const lists.
1138 * msrs_to_save is selected from the msrs_to_save_all to reflect the
1137 * capabilities of the host cpu. This capabilities test skips MSRs that are 1139 * capabilities of the host cpu. This capabilities test skips MSRs that are
1138 * kvm-specific. Those are put in emulated_msrs; filtering of emulated_msrs 1140 * kvm-specific. Those are put in emulated_msrs_all; filtering of emulated_msrs
1139 * may depend on host virtualization features rather than host cpu features. 1141 * may depend on host virtualization features rather than host cpu features.
1140 */ 1142 */
1141 1143
1142static u32 msrs_to_save[] = { 1144static const u32 msrs_to_save_all[] = {
1143 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, 1145 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
1144 MSR_STAR, 1146 MSR_STAR,
1145#ifdef CONFIG_X86_64 1147#ifdef CONFIG_X86_64
@@ -1180,9 +1182,10 @@ static u32 msrs_to_save[] = {
1180 MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17, 1182 MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
1181}; 1183};
1182 1184
1185static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_all)];
1183static unsigned num_msrs_to_save; 1186static unsigned num_msrs_to_save;
1184 1187
1185static u32 emulated_msrs[] = { 1188static const u32 emulated_msrs_all[] = {
1186 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, 1189 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
1187 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, 1190 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
1188 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, 1191 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
@@ -1221,7 +1224,7 @@ static u32 emulated_msrs[] = {
1221 * by arch/x86/kvm/vmx/nested.c based on CPUID or other MSRs. 1224 * by arch/x86/kvm/vmx/nested.c based on CPUID or other MSRs.
1222 * We always support the "true" VMX control MSRs, even if the host 1225 * We always support the "true" VMX control MSRs, even if the host
1223 * processor does not, so I am putting these registers here rather 1226 * processor does not, so I am putting these registers here rather
1224 * than in msrs_to_save. 1227 * than in msrs_to_save_all.
1225 */ 1228 */
1226 MSR_IA32_VMX_BASIC, 1229 MSR_IA32_VMX_BASIC,
1227 MSR_IA32_VMX_TRUE_PINBASED_CTLS, 1230 MSR_IA32_VMX_TRUE_PINBASED_CTLS,
@@ -1240,13 +1243,14 @@ static u32 emulated_msrs[] = {
1240 MSR_KVM_POLL_CONTROL, 1243 MSR_KVM_POLL_CONTROL,
1241}; 1244};
1242 1245
1246static u32 emulated_msrs[ARRAY_SIZE(emulated_msrs_all)];
1243static unsigned num_emulated_msrs; 1247static unsigned num_emulated_msrs;
1244 1248
1245/* 1249/*
1246 * List of msr numbers which are used to expose MSR-based features that 1250 * List of msr numbers which are used to expose MSR-based features that
1247 * can be used by a hypervisor to validate requested CPU features. 1251 * can be used by a hypervisor to validate requested CPU features.
1248 */ 1252 */
1249static u32 msr_based_features[] = { 1253static const u32 msr_based_features_all[] = {
1250 MSR_IA32_VMX_BASIC, 1254 MSR_IA32_VMX_BASIC,
1251 MSR_IA32_VMX_TRUE_PINBASED_CTLS, 1255 MSR_IA32_VMX_TRUE_PINBASED_CTLS,
1252 MSR_IA32_VMX_PINBASED_CTLS, 1256 MSR_IA32_VMX_PINBASED_CTLS,
@@ -1271,6 +1275,7 @@ static u32 msr_based_features[] = {
1271 MSR_IA32_ARCH_CAPABILITIES, 1275 MSR_IA32_ARCH_CAPABILITIES,
1272}; 1276};
1273 1277
1278static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)];
1274static unsigned int num_msr_based_features; 1279static unsigned int num_msr_based_features;
1275 1280
1276static u64 kvm_get_arch_capabilities(void) 1281static u64 kvm_get_arch_capabilities(void)
@@ -5118,22 +5123,22 @@ static void kvm_init_msr_list(void)
5118{ 5123{
5119 struct x86_pmu_capability x86_pmu; 5124 struct x86_pmu_capability x86_pmu;
5120 u32 dummy[2]; 5125 u32 dummy[2];
5121 unsigned i, j; 5126 unsigned i;
5122 5127
5123 BUILD_BUG_ON_MSG(INTEL_PMC_MAX_FIXED != 4, 5128 BUILD_BUG_ON_MSG(INTEL_PMC_MAX_FIXED != 4,
5124 "Please update the fixed PMCs in msrs_to_save[]"); 5129 "Please update the fixed PMCs in msrs_to_saved_all[]");
5125 5130
5126 perf_get_x86_pmu_capability(&x86_pmu); 5131 perf_get_x86_pmu_capability(&x86_pmu);
5127 5132
5128 for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) { 5133 for (i = 0; i < ARRAY_SIZE(msrs_to_save_all); i++) {
5129 if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0) 5134 if (rdmsr_safe(msrs_to_save_all[i], &dummy[0], &dummy[1]) < 0)
5130 continue; 5135 continue;
5131 5136
5132 /* 5137 /*
5133 * Even MSRs that are valid in the host may not be exposed 5138 * Even MSRs that are valid in the host may not be exposed
5134 * to the guests in some cases. 5139 * to the guests in some cases.
5135 */ 5140 */
5136 switch (msrs_to_save[i]) { 5141 switch (msrs_to_save_all[i]) {
5137 case MSR_IA32_BNDCFGS: 5142 case MSR_IA32_BNDCFGS:
5138 if (!kvm_mpx_supported()) 5143 if (!kvm_mpx_supported())
5139 continue; 5144 continue;
@@ -5161,17 +5166,17 @@ static void kvm_init_msr_list(void)
5161 break; 5166 break;
5162 case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: { 5167 case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: {
5163 if (!kvm_x86_ops->pt_supported() || 5168 if (!kvm_x86_ops->pt_supported() ||
5164 msrs_to_save[i] - MSR_IA32_RTIT_ADDR0_A >= 5169 msrs_to_save_all[i] - MSR_IA32_RTIT_ADDR0_A >=
5165 intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2) 5170 intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2)
5166 continue; 5171 continue;
5167 break; 5172 break;
5168 case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 17: 5173 case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 17:
5169 if (msrs_to_save[i] - MSR_ARCH_PERFMON_PERFCTR0 >= 5174 if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_PERFCTR0 >=
5170 min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp)) 5175 min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
5171 continue; 5176 continue;
5172 break; 5177 break;
5173 case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 17: 5178 case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 17:
5174 if (msrs_to_save[i] - MSR_ARCH_PERFMON_EVENTSEL0 >= 5179 if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_EVENTSEL0 >=
5175 min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp)) 5180 min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
5176 continue; 5181 continue;
5177 } 5182 }
@@ -5179,34 +5184,25 @@ static void kvm_init_msr_list(void)
5179 break; 5184 break;
5180 } 5185 }
5181 5186
5182 if (j < i) 5187 msrs_to_save[num_msrs_to_save++] = msrs_to_save_all[i];
5183 msrs_to_save[j] = msrs_to_save[i];
5184 j++;
5185 } 5188 }
5186 num_msrs_to_save = j;
5187 5189
5188 for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) { 5190 for (i = 0; i < ARRAY_SIZE(emulated_msrs_all); i++) {
5189 if (!kvm_x86_ops->has_emulated_msr(emulated_msrs[i])) 5191 if (!kvm_x86_ops->has_emulated_msr(emulated_msrs_all[i]))
5190 continue; 5192 continue;
5191 5193
5192 if (j < i) 5194 emulated_msrs[num_emulated_msrs++] = emulated_msrs_all[i];
5193 emulated_msrs[j] = emulated_msrs[i];
5194 j++;
5195 } 5195 }
5196 num_emulated_msrs = j;
5197 5196
5198 for (i = j = 0; i < ARRAY_SIZE(msr_based_features); i++) { 5197 for (i = 0; i < ARRAY_SIZE(msr_based_features_all); i++) {
5199 struct kvm_msr_entry msr; 5198 struct kvm_msr_entry msr;
5200 5199
5201 msr.index = msr_based_features[i]; 5200 msr.index = msr_based_features_all[i];
5202 if (kvm_get_msr_feature(&msr)) 5201 if (kvm_get_msr_feature(&msr))
5203 continue; 5202 continue;
5204 5203
5205 if (j < i) 5204 msr_based_features[num_msr_based_features++] = msr_based_features_all[i];
5206 msr_based_features[j] = msr_based_features[i];
5207 j++;
5208 } 5205 }
5209 num_msr_based_features = j;
5210} 5206}
5211 5207
5212static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, 5208static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 52ed5f66e8f9..d41c521a39da 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -966,6 +966,7 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
966void kvm_vcpu_kick(struct kvm_vcpu *vcpu); 966void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
967 967
968bool kvm_is_reserved_pfn(kvm_pfn_t pfn); 968bool kvm_is_reserved_pfn(kvm_pfn_t pfn);
969bool kvm_is_zone_device_pfn(kvm_pfn_t pfn);
969 970
970struct kvm_irq_ack_notifier { 971struct kvm_irq_ack_notifier {
971 struct hlist_node link; 972 struct hlist_node link;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4aab3547a165..524cff24a68d 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -150,10 +150,30 @@ __weak int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
150 return 0; 150 return 0;
151} 151}
152 152
153bool kvm_is_zone_device_pfn(kvm_pfn_t pfn)
154{
155 /*
156 * The metadata used by is_zone_device_page() to determine whether or
157 * not a page is ZONE_DEVICE is guaranteed to be valid if and only if
158 * the device has been pinned, e.g. by get_user_pages(). WARN if the
159 * page_count() is zero to help detect bad usage of this helper.
160 */
161 if (!pfn_valid(pfn) || WARN_ON_ONCE(!page_count(pfn_to_page(pfn))))
162 return false;
163
164 return is_zone_device_page(pfn_to_page(pfn));
165}
166
153bool kvm_is_reserved_pfn(kvm_pfn_t pfn) 167bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
154{ 168{
169 /*
170 * ZONE_DEVICE pages currently set PG_reserved, but from a refcounting
171 * perspective they are "normal" pages, albeit with slightly different
172 * usage rules.
173 */
155 if (pfn_valid(pfn)) 174 if (pfn_valid(pfn))
156 return PageReserved(pfn_to_page(pfn)); 175 return PageReserved(pfn_to_page(pfn)) &&
176 !kvm_is_zone_device_pfn(pfn);
157 177
158 return true; 178 return true;
159} 179}
@@ -663,6 +683,12 @@ static struct kvm *kvm_create_vm(unsigned long type)
663 683
664 BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX); 684 BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX);
665 685
686 if (init_srcu_struct(&kvm->srcu))
687 goto out_err_no_srcu;
688 if (init_srcu_struct(&kvm->irq_srcu))
689 goto out_err_no_irq_srcu;
690
691 refcount_set(&kvm->users_count, 1);
666 for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { 692 for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
667 struct kvm_memslots *slots = kvm_alloc_memslots(); 693 struct kvm_memslots *slots = kvm_alloc_memslots();
668 694
@@ -680,7 +706,6 @@ static struct kvm *kvm_create_vm(unsigned long type)
680 goto out_err_no_arch_destroy_vm; 706 goto out_err_no_arch_destroy_vm;
681 } 707 }
682 708
683 refcount_set(&kvm->users_count, 1);
684 r = kvm_arch_init_vm(kvm, type); 709 r = kvm_arch_init_vm(kvm, type);
685 if (r) 710 if (r)
686 goto out_err_no_arch_destroy_vm; 711 goto out_err_no_arch_destroy_vm;
@@ -693,11 +718,6 @@ static struct kvm *kvm_create_vm(unsigned long type)
693 INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); 718 INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
694#endif 719#endif
695 720
696 if (init_srcu_struct(&kvm->srcu))
697 goto out_err_no_srcu;
698 if (init_srcu_struct(&kvm->irq_srcu))
699 goto out_err_no_irq_srcu;
700
701 r = kvm_init_mmu_notifier(kvm); 721 r = kvm_init_mmu_notifier(kvm);
702 if (r) 722 if (r)
703 goto out_err_no_mmu_notifier; 723 goto out_err_no_mmu_notifier;
@@ -720,19 +740,19 @@ out_err:
720 mmu_notifier_unregister(&kvm->mmu_notifier, current->mm); 740 mmu_notifier_unregister(&kvm->mmu_notifier, current->mm);
721#endif 741#endif
722out_err_no_mmu_notifier: 742out_err_no_mmu_notifier:
723 cleanup_srcu_struct(&kvm->irq_srcu);
724out_err_no_irq_srcu:
725 cleanup_srcu_struct(&kvm->srcu);
726out_err_no_srcu:
727 hardware_disable_all(); 743 hardware_disable_all();
728out_err_no_disable: 744out_err_no_disable:
729 kvm_arch_destroy_vm(kvm); 745 kvm_arch_destroy_vm(kvm);
730 WARN_ON_ONCE(!refcount_dec_and_test(&kvm->users_count));
731out_err_no_arch_destroy_vm: 746out_err_no_arch_destroy_vm:
747 WARN_ON_ONCE(!refcount_dec_and_test(&kvm->users_count));
732 for (i = 0; i < KVM_NR_BUSES; i++) 748 for (i = 0; i < KVM_NR_BUSES; i++)
733 kfree(kvm_get_bus(kvm, i)); 749 kfree(kvm_get_bus(kvm, i));
734 for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) 750 for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
735 kvm_free_memslots(kvm, __kvm_memslots(kvm, i)); 751 kvm_free_memslots(kvm, __kvm_memslots(kvm, i));
752 cleanup_srcu_struct(&kvm->irq_srcu);
753out_err_no_irq_srcu:
754 cleanup_srcu_struct(&kvm->srcu);
755out_err_no_srcu:
736 kvm_arch_free_vm(kvm); 756 kvm_arch_free_vm(kvm);
737 mmdrop(current->mm); 757 mmdrop(current->mm);
738 return ERR_PTR(r); 758 return ERR_PTR(r);
@@ -1886,7 +1906,7 @@ EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty);
1886 1906
1887void kvm_set_pfn_dirty(kvm_pfn_t pfn) 1907void kvm_set_pfn_dirty(kvm_pfn_t pfn)
1888{ 1908{
1889 if (!kvm_is_reserved_pfn(pfn)) { 1909 if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn)) {
1890 struct page *page = pfn_to_page(pfn); 1910 struct page *page = pfn_to_page(pfn);
1891 1911
1892 SetPageDirty(page); 1912 SetPageDirty(page);
@@ -1896,7 +1916,7 @@ EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty);
1896 1916
1897void kvm_set_pfn_accessed(kvm_pfn_t pfn) 1917void kvm_set_pfn_accessed(kvm_pfn_t pfn)
1898{ 1918{
1899 if (!kvm_is_reserved_pfn(pfn)) 1919 if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn))
1900 mark_page_accessed(pfn_to_page(pfn)); 1920 mark_page_accessed(pfn_to_page(pfn));
1901} 1921}
1902EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); 1922EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed);