aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarcelo Tosatti <mtosatti@redhat.com>2007-12-20 19:18:22 -0500
committerAvi Kivity <avi@qumranet.com>2008-01-30 11:01:20 -0500
commit10589a4699bb978c781ce73bbae8ca942c5250c9 (patch)
tree5585ed87fff0a2ba259fcc6f998022481da75f68
parent774ead3ad9bcbc05ef6aaebb9bdf8b4c3126923b (diff)
KVM: MMU: Concurrent guest walkers
Do not hold kvm->lock mutex across the entire pagefault code, only acquire it in places where it is necessary, such as mmu hash list, active list, rmap and parent pte handling. Allow concurrent guest walkers by switching walk_addr() to use mmap_sem in read-mode. And get rid of the lockless __gfn_to_page. [avi: move kvm_mmu_pte_write() locking inside the function] [avi: add locking for real mode] [avi: fix cmpxchg locking] Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> Signed-off-by: Avi Kivity <avi@qumranet.com>
-rw-r--r--arch/x86/kvm/mmu.c41
-rw-r--r--arch/x86/kvm/paging_tmpl.h8
-rw-r--r--arch/x86/kvm/vmx.c25
-rw-r--r--arch/x86/kvm/x86.c92
-rw-r--r--virt/kvm/kvm_main.c22
5 files changed, 117 insertions, 71 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 8f12ec52ad86..3b91227969a5 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -974,7 +974,7 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
974{ 974{
975} 975}
976 976
977static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) 977static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
978{ 978{
979 int level = PT32E_ROOT_LEVEL; 979 int level = PT32E_ROOT_LEVEL;
980 hpa_t table_addr = vcpu->arch.mmu.root_hpa; 980 hpa_t table_addr = vcpu->arch.mmu.root_hpa;
@@ -1015,6 +1015,17 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
1015 } 1015 }
1016} 1016}
1017 1017
1018static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
1019{
1020 int r;
1021
1022 mutex_lock(&vcpu->kvm->lock);
1023 r = __nonpaging_map(vcpu, v, write, gfn);
1024 mutex_unlock(&vcpu->kvm->lock);
1025 return r;
1026}
1027
1028
1018static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu, 1029static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu,
1019 struct kvm_mmu_page *sp) 1030 struct kvm_mmu_page *sp)
1020{ 1031{
@@ -1031,6 +1042,7 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
1031 1042
1032 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) 1043 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
1033 return; 1044 return;
1045 mutex_lock(&vcpu->kvm->lock);
1034#ifdef CONFIG_X86_64 1046#ifdef CONFIG_X86_64
1035 if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { 1047 if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
1036 hpa_t root = vcpu->arch.mmu.root_hpa; 1048 hpa_t root = vcpu->arch.mmu.root_hpa;
@@ -1038,6 +1050,7 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
1038 sp = page_header(root); 1050 sp = page_header(root);
1039 --sp->root_count; 1051 --sp->root_count;
1040 vcpu->arch.mmu.root_hpa = INVALID_PAGE; 1052 vcpu->arch.mmu.root_hpa = INVALID_PAGE;
1053 mutex_unlock(&vcpu->kvm->lock);
1041 return; 1054 return;
1042 } 1055 }
1043#endif 1056#endif
@@ -1051,6 +1064,7 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
1051 } 1064 }
1052 vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; 1065 vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;
1053 } 1066 }
1067 mutex_unlock(&vcpu->kvm->lock);
1054 vcpu->arch.mmu.root_hpa = INVALID_PAGE; 1068 vcpu->arch.mmu.root_hpa = INVALID_PAGE;
1055} 1069}
1056 1070
@@ -1250,15 +1264,15 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
1250{ 1264{
1251 int r; 1265 int r;
1252 1266
1253 mutex_lock(&vcpu->kvm->lock);
1254 r = mmu_topup_memory_caches(vcpu); 1267 r = mmu_topup_memory_caches(vcpu);
1255 if (r) 1268 if (r)
1256 goto out; 1269 goto out;
1270 mutex_lock(&vcpu->kvm->lock);
1257 mmu_alloc_roots(vcpu); 1271 mmu_alloc_roots(vcpu);
1272 mutex_unlock(&vcpu->kvm->lock);
1258 kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa); 1273 kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa);
1259 kvm_mmu_flush_tlb(vcpu); 1274 kvm_mmu_flush_tlb(vcpu);
1260out: 1275out:
1261 mutex_unlock(&vcpu->kvm->lock);
1262 return r; 1276 return r;
1263} 1277}
1264EXPORT_SYMBOL_GPL(kvm_mmu_load); 1278EXPORT_SYMBOL_GPL(kvm_mmu_load);
@@ -1353,6 +1367,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1353 int npte; 1367 int npte;
1354 1368
1355 pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes); 1369 pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes);
1370 mutex_lock(&vcpu->kvm->lock);
1356 ++vcpu->kvm->stat.mmu_pte_write; 1371 ++vcpu->kvm->stat.mmu_pte_write;
1357 kvm_mmu_audit(vcpu, "pre pte write"); 1372 kvm_mmu_audit(vcpu, "pre pte write");
1358 if (gfn == vcpu->arch.last_pt_write_gfn 1373 if (gfn == vcpu->arch.last_pt_write_gfn
@@ -1421,17 +1436,27 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1421 } 1436 }
1422 } 1437 }
1423 kvm_mmu_audit(vcpu, "post pte write"); 1438 kvm_mmu_audit(vcpu, "post pte write");
1439 mutex_unlock(&vcpu->kvm->lock);
1424} 1440}
1425 1441
1426int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) 1442int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
1427{ 1443{
1428 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva); 1444 gpa_t gpa;
1445 int r;
1429 1446
1430 return kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); 1447 down_read(&current->mm->mmap_sem);
1448 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
1449 up_read(&current->mm->mmap_sem);
1450
1451 mutex_lock(&vcpu->kvm->lock);
1452 r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
1453 mutex_unlock(&vcpu->kvm->lock);
1454 return r;
1431} 1455}
1432 1456
1433void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) 1457void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
1434{ 1458{
1459 mutex_lock(&vcpu->kvm->lock);
1435 while (vcpu->kvm->arch.n_free_mmu_pages < KVM_REFILL_PAGES) { 1460 while (vcpu->kvm->arch.n_free_mmu_pages < KVM_REFILL_PAGES) {
1436 struct kvm_mmu_page *sp; 1461 struct kvm_mmu_page *sp;
1437 1462
@@ -1440,6 +1465,7 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
1440 kvm_mmu_zap_page(vcpu->kvm, sp); 1465 kvm_mmu_zap_page(vcpu->kvm, sp);
1441 ++vcpu->kvm->stat.mmu_recycled; 1466 ++vcpu->kvm->stat.mmu_recycled;
1442 } 1467 }
1468 mutex_unlock(&vcpu->kvm->lock);
1443} 1469}
1444 1470
1445int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) 1471int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
@@ -1447,7 +1473,6 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
1447 int r; 1473 int r;
1448 enum emulation_result er; 1474 enum emulation_result er;
1449 1475
1450 mutex_lock(&vcpu->kvm->lock);
1451 r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code); 1476 r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code);
1452 if (r < 0) 1477 if (r < 0)
1453 goto out; 1478 goto out;
@@ -1462,7 +1487,6 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
1462 goto out; 1487 goto out;
1463 1488
1464 er = emulate_instruction(vcpu, vcpu->run, cr2, error_code, 0); 1489 er = emulate_instruction(vcpu, vcpu->run, cr2, error_code, 0);
1465 mutex_unlock(&vcpu->kvm->lock);
1466 1490
1467 switch (er) { 1491 switch (er) {
1468 case EMULATE_DONE: 1492 case EMULATE_DONE:
@@ -1477,7 +1501,6 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
1477 BUG(); 1501 BUG();
1478 } 1502 }
1479out: 1503out:
1480 mutex_unlock(&vcpu->kvm->lock);
1481 return r; 1504 return r;
1482} 1505}
1483EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); 1506EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
@@ -1574,8 +1597,10 @@ void kvm_mmu_zap_all(struct kvm *kvm)
1574{ 1597{
1575 struct kvm_mmu_page *sp, *node; 1598 struct kvm_mmu_page *sp, *node;
1576 1599
1600 mutex_lock(&kvm->lock);
1577 list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) 1601 list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link)
1578 kvm_mmu_zap_page(kvm, sp); 1602 kvm_mmu_zap_page(kvm, sp);
1603 mutex_unlock(&kvm->lock);
1579 1604
1580 kvm_flush_remote_tlbs(kvm); 1605 kvm_flush_remote_tlbs(kvm);
1581} 1606}
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 56b88f7e83ef..7f83f5557d5e 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -368,11 +368,13 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
368 if (r) 368 if (r)
369 return r; 369 return r;
370 370
371 down_read(&current->mm->mmap_sem);
371 /* 372 /*
372 * Look up the shadow pte for the faulting address. 373 * Look up the shadow pte for the faulting address.
373 */ 374 */
374 r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault, 375 r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault,
375 fetch_fault); 376 fetch_fault);
377 up_read(&current->mm->mmap_sem);
376 378
377 /* 379 /*
378 * The page is not mapped by the guest. Let the guest handle it. 380 * The page is not mapped by the guest. Let the guest handle it.
@@ -384,6 +386,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
384 return 0; 386 return 0;
385 } 387 }
386 388
389 mutex_lock(&vcpu->kvm->lock);
387 shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, 390 shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
388 &write_pt); 391 &write_pt);
389 pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__, 392 pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__,
@@ -395,11 +398,14 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
395 /* 398 /*
396 * mmio: emulate if accessible, otherwise its a guest fault. 399 * mmio: emulate if accessible, otherwise its a guest fault.
397 */ 400 */
398 if (shadow_pte && is_io_pte(*shadow_pte)) 401 if (shadow_pte && is_io_pte(*shadow_pte)) {
402 mutex_unlock(&vcpu->kvm->lock);
399 return 1; 403 return 1;
404 }
400 405
401 ++vcpu->stat.pf_fixed; 406 ++vcpu->stat.pf_fixed;
402 kvm_mmu_audit(vcpu, "post page fault (fixed)"); 407 kvm_mmu_audit(vcpu, "post page fault (fixed)");
408 mutex_unlock(&vcpu->kvm->lock);
403 409
404 return write_pt; 410 return write_pt;
405} 411}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d1167fc303d6..c39493feba46 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1432,27 +1432,34 @@ static int init_rmode_tss(struct kvm *kvm)
1432{ 1432{
1433 gfn_t fn = rmode_tss_base(kvm) >> PAGE_SHIFT; 1433 gfn_t fn = rmode_tss_base(kvm) >> PAGE_SHIFT;
1434 u16 data = 0; 1434 u16 data = 0;
1435 int ret = 0;
1435 int r; 1436 int r;
1436 1437
1438 down_read(&current->mm->mmap_sem);
1437 r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); 1439 r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
1438 if (r < 0) 1440 if (r < 0)
1439 return 0; 1441 goto out;
1440 data = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE; 1442 data = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE;
1441 r = kvm_write_guest_page(kvm, fn++, &data, 0x66, sizeof(u16)); 1443 r = kvm_write_guest_page(kvm, fn++, &data, 0x66, sizeof(u16));
1442 if (r < 0) 1444 if (r < 0)
1443 return 0; 1445 goto out;
1444 r = kvm_clear_guest_page(kvm, fn++, 0, PAGE_SIZE); 1446 r = kvm_clear_guest_page(kvm, fn++, 0, PAGE_SIZE);
1445 if (r < 0) 1447 if (r < 0)
1446 return 0; 1448 goto out;
1447 r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); 1449 r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
1448 if (r < 0) 1450 if (r < 0)
1449 return 0; 1451 goto out;
1450 data = ~0; 1452 data = ~0;
1451 r = kvm_write_guest_page(kvm, fn, &data, RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1, 1453 r = kvm_write_guest_page(kvm, fn, &data,
1452 sizeof(u8)); 1454 RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1,
1455 sizeof(u8));
1453 if (r < 0) 1456 if (r < 0)
1454 return 0; 1457 goto out;
1455 return 1; 1458
1459 ret = 1;
1460out:
1461 up_read(&current->mm->mmap_sem);
1462 return ret;
1456} 1463}
1457 1464
1458static void seg_setup(int seg) 1465static void seg_setup(int seg)
@@ -1471,6 +1478,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
1471 int r = 0; 1478 int r = 0;
1472 1479
1473 mutex_lock(&kvm->lock); 1480 mutex_lock(&kvm->lock);
1481 down_write(&current->mm->mmap_sem);
1474 if (kvm->arch.apic_access_page) 1482 if (kvm->arch.apic_access_page)
1475 goto out; 1483 goto out;
1476 kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT; 1484 kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
@@ -1482,6 +1490,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
1482 goto out; 1490 goto out;
1483 kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00); 1491 kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00);
1484out: 1492out:
1493 up_write(&current->mm->mmap_sem);
1485 mutex_unlock(&kvm->lock); 1494 mutex_unlock(&kvm->lock);
1486 return r; 1495 return r;
1487} 1496}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1f48ec871035..e3b3141db13c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -181,7 +181,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
181 int ret; 181 int ret;
182 u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)]; 182 u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)];
183 183
184 mutex_lock(&vcpu->kvm->lock); 184 down_read(&current->mm->mmap_sem);
185 ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte, 185 ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte,
186 offset * sizeof(u64), sizeof(pdpte)); 186 offset * sizeof(u64), sizeof(pdpte));
187 if (ret < 0) { 187 if (ret < 0) {
@@ -198,7 +198,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
198 198
199 memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs)); 199 memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs));
200out: 200out:
201 mutex_unlock(&vcpu->kvm->lock); 201 up_read(&current->mm->mmap_sem);
202 202
203 return ret; 203 return ret;
204} 204}
@@ -212,13 +212,13 @@ static bool pdptrs_changed(struct kvm_vcpu *vcpu)
212 if (is_long_mode(vcpu) || !is_pae(vcpu)) 212 if (is_long_mode(vcpu) || !is_pae(vcpu))
213 return false; 213 return false;
214 214
215 mutex_lock(&vcpu->kvm->lock); 215 down_read(&current->mm->mmap_sem);
216 r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte)); 216 r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte));
217 if (r < 0) 217 if (r < 0)
218 goto out; 218 goto out;
219 changed = memcmp(pdpte, vcpu->arch.pdptrs, sizeof(pdpte)) != 0; 219 changed = memcmp(pdpte, vcpu->arch.pdptrs, sizeof(pdpte)) != 0;
220out: 220out:
221 mutex_unlock(&vcpu->kvm->lock); 221 up_read(&current->mm->mmap_sem);
222 222
223 return changed; 223 return changed;
224} 224}
@@ -278,9 +278,7 @@ void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
278 kvm_x86_ops->set_cr0(vcpu, cr0); 278 kvm_x86_ops->set_cr0(vcpu, cr0);
279 vcpu->arch.cr0 = cr0; 279 vcpu->arch.cr0 = cr0;
280 280
281 mutex_lock(&vcpu->kvm->lock);
282 kvm_mmu_reset_context(vcpu); 281 kvm_mmu_reset_context(vcpu);
283 mutex_unlock(&vcpu->kvm->lock);
284 return; 282 return;
285} 283}
286EXPORT_SYMBOL_GPL(set_cr0); 284EXPORT_SYMBOL_GPL(set_cr0);
@@ -320,9 +318,7 @@ void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
320 } 318 }
321 kvm_x86_ops->set_cr4(vcpu, cr4); 319 kvm_x86_ops->set_cr4(vcpu, cr4);
322 vcpu->arch.cr4 = cr4; 320 vcpu->arch.cr4 = cr4;
323 mutex_lock(&vcpu->kvm->lock);
324 kvm_mmu_reset_context(vcpu); 321 kvm_mmu_reset_context(vcpu);
325 mutex_unlock(&vcpu->kvm->lock);
326} 322}
327EXPORT_SYMBOL_GPL(set_cr4); 323EXPORT_SYMBOL_GPL(set_cr4);
328 324
@@ -360,7 +356,7 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
360 */ 356 */
361 } 357 }
362 358
363 mutex_lock(&vcpu->kvm->lock); 359 down_read(&current->mm->mmap_sem);
364 /* 360 /*
365 * Does the new cr3 value map to physical memory? (Note, we 361 * Does the new cr3 value map to physical memory? (Note, we
366 * catch an invalid cr3 even in real-mode, because it would 362 * catch an invalid cr3 even in real-mode, because it would
@@ -376,7 +372,7 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
376 vcpu->arch.cr3 = cr3; 372 vcpu->arch.cr3 = cr3;
377 vcpu->arch.mmu.new_cr3(vcpu); 373 vcpu->arch.mmu.new_cr3(vcpu);
378 } 374 }
379 mutex_unlock(&vcpu->kvm->lock); 375 up_read(&current->mm->mmap_sem);
380} 376}
381EXPORT_SYMBOL_GPL(set_cr3); 377EXPORT_SYMBOL_GPL(set_cr3);
382 378
@@ -1211,12 +1207,12 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
1211 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES) 1207 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
1212 return -EINVAL; 1208 return -EINVAL;
1213 1209
1214 mutex_lock(&kvm->lock); 1210 down_write(&current->mm->mmap_sem);
1215 1211
1216 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); 1212 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
1217 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; 1213 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
1218 1214
1219 mutex_unlock(&kvm->lock); 1215 up_write(&current->mm->mmap_sem);
1220 return 0; 1216 return 0;
1221} 1217}
1222 1218
@@ -1265,7 +1261,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
1265 < alias->target_phys_addr) 1261 < alias->target_phys_addr)
1266 goto out; 1262 goto out;
1267 1263
1268 mutex_lock(&kvm->lock); 1264 down_write(&current->mm->mmap_sem);
1269 1265
1270 p = &kvm->arch.aliases[alias->slot]; 1266 p = &kvm->arch.aliases[alias->slot];
1271 p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; 1267 p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
@@ -1279,7 +1275,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
1279 1275
1280 kvm_mmu_zap_all(kvm); 1276 kvm_mmu_zap_all(kvm);
1281 1277
1282 mutex_unlock(&kvm->lock); 1278 up_write(&current->mm->mmap_sem);
1283 1279
1284 return 0; 1280 return 0;
1285 1281
@@ -1355,7 +1351,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
1355 struct kvm_memory_slot *memslot; 1351 struct kvm_memory_slot *memslot;
1356 int is_dirty = 0; 1352 int is_dirty = 0;
1357 1353
1358 mutex_lock(&kvm->lock); 1354 down_write(&current->mm->mmap_sem);
1359 1355
1360 r = kvm_get_dirty_log(kvm, log, &is_dirty); 1356 r = kvm_get_dirty_log(kvm, log, &is_dirty);
1361 if (r) 1357 if (r)
@@ -1371,7 +1367,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
1371 } 1367 }
1372 r = 0; 1368 r = 0;
1373out: 1369out:
1374 mutex_unlock(&kvm->lock); 1370 up_write(&current->mm->mmap_sem);
1375 return r; 1371 return r;
1376} 1372}
1377 1373
@@ -1565,25 +1561,32 @@ int emulator_read_std(unsigned long addr,
1565 struct kvm_vcpu *vcpu) 1561 struct kvm_vcpu *vcpu)
1566{ 1562{
1567 void *data = val; 1563 void *data = val;
1564 int r = X86EMUL_CONTINUE;
1568 1565
1566 down_read(&current->mm->mmap_sem);
1569 while (bytes) { 1567 while (bytes) {
1570 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 1568 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
1571 unsigned offset = addr & (PAGE_SIZE-1); 1569 unsigned offset = addr & (PAGE_SIZE-1);
1572 unsigned tocopy = min(bytes, (unsigned)PAGE_SIZE - offset); 1570 unsigned tocopy = min(bytes, (unsigned)PAGE_SIZE - offset);
1573 int ret; 1571 int ret;
1574 1572
1575 if (gpa == UNMAPPED_GVA) 1573 if (gpa == UNMAPPED_GVA) {
1576 return X86EMUL_PROPAGATE_FAULT; 1574 r = X86EMUL_PROPAGATE_FAULT;
1575 goto out;
1576 }
1577 ret = kvm_read_guest(vcpu->kvm, gpa, data, tocopy); 1577 ret = kvm_read_guest(vcpu->kvm, gpa, data, tocopy);
1578 if (ret < 0) 1578 if (ret < 0) {
1579 return X86EMUL_UNHANDLEABLE; 1579 r = X86EMUL_UNHANDLEABLE;
1580 goto out;
1581 }
1580 1582
1581 bytes -= tocopy; 1583 bytes -= tocopy;
1582 data += tocopy; 1584 data += tocopy;
1583 addr += tocopy; 1585 addr += tocopy;
1584 } 1586 }
1585 1587out:
1586 return X86EMUL_CONTINUE; 1588 up_read(&current->mm->mmap_sem);
1589 return r;
1587} 1590}
1588EXPORT_SYMBOL_GPL(emulator_read_std); 1591EXPORT_SYMBOL_GPL(emulator_read_std);
1589 1592
@@ -1601,7 +1604,9 @@ static int emulator_read_emulated(unsigned long addr,
1601 return X86EMUL_CONTINUE; 1604 return X86EMUL_CONTINUE;
1602 } 1605 }
1603 1606
1607 down_read(&current->mm->mmap_sem);
1604 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 1608 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
1609 up_read(&current->mm->mmap_sem);
1605 1610
1606 /* For APIC access vmexit */ 1611 /* For APIC access vmexit */
1607 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) 1612 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
@@ -1617,11 +1622,14 @@ mmio:
1617 /* 1622 /*
1618 * Is this MMIO handled locally? 1623 * Is this MMIO handled locally?
1619 */ 1624 */
1625 mutex_lock(&vcpu->kvm->lock);
1620 mmio_dev = vcpu_find_mmio_dev(vcpu, gpa); 1626 mmio_dev = vcpu_find_mmio_dev(vcpu, gpa);
1621 if (mmio_dev) { 1627 if (mmio_dev) {
1622 kvm_iodevice_read(mmio_dev, gpa, bytes, val); 1628 kvm_iodevice_read(mmio_dev, gpa, bytes, val);
1629 mutex_unlock(&vcpu->kvm->lock);
1623 return X86EMUL_CONTINUE; 1630 return X86EMUL_CONTINUE;
1624 } 1631 }
1632 mutex_unlock(&vcpu->kvm->lock);
1625 1633
1626 vcpu->mmio_needed = 1; 1634 vcpu->mmio_needed = 1;
1627 vcpu->mmio_phys_addr = gpa; 1635 vcpu->mmio_phys_addr = gpa;
@@ -1636,10 +1644,14 @@ static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
1636{ 1644{
1637 int ret; 1645 int ret;
1638 1646
1647 down_read(&current->mm->mmap_sem);
1639 ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); 1648 ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
1640 if (ret < 0) 1649 if (ret < 0) {
1650 up_read(&current->mm->mmap_sem);
1641 return 0; 1651 return 0;
1652 }
1642 kvm_mmu_pte_write(vcpu, gpa, val, bytes); 1653 kvm_mmu_pte_write(vcpu, gpa, val, bytes);
1654 up_read(&current->mm->mmap_sem);
1643 return 1; 1655 return 1;
1644} 1656}
1645 1657
@@ -1649,7 +1661,11 @@ static int emulator_write_emulated_onepage(unsigned long addr,
1649 struct kvm_vcpu *vcpu) 1661 struct kvm_vcpu *vcpu)
1650{ 1662{
1651 struct kvm_io_device *mmio_dev; 1663 struct kvm_io_device *mmio_dev;
1652 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 1664 gpa_t gpa;
1665
1666 down_read(&current->mm->mmap_sem);
1667 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
1668 up_read(&current->mm->mmap_sem);
1653 1669
1654 if (gpa == UNMAPPED_GVA) { 1670 if (gpa == UNMAPPED_GVA) {
1655 kvm_inject_page_fault(vcpu, addr, 2); 1671 kvm_inject_page_fault(vcpu, addr, 2);
@@ -1667,11 +1683,14 @@ mmio:
1667 /* 1683 /*
1668 * Is this MMIO handled locally? 1684 * Is this MMIO handled locally?
1669 */ 1685 */
1686 mutex_lock(&vcpu->kvm->lock);
1670 mmio_dev = vcpu_find_mmio_dev(vcpu, gpa); 1687 mmio_dev = vcpu_find_mmio_dev(vcpu, gpa);
1671 if (mmio_dev) { 1688 if (mmio_dev) {
1672 kvm_iodevice_write(mmio_dev, gpa, bytes, val); 1689 kvm_iodevice_write(mmio_dev, gpa, bytes, val);
1690 mutex_unlock(&vcpu->kvm->lock);
1673 return X86EMUL_CONTINUE; 1691 return X86EMUL_CONTINUE;
1674 } 1692 }
1693 mutex_unlock(&vcpu->kvm->lock);
1675 1694
1676 vcpu->mmio_needed = 1; 1695 vcpu->mmio_needed = 1;
1677 vcpu->mmio_phys_addr = gpa; 1696 vcpu->mmio_phys_addr = gpa;
@@ -1718,11 +1737,14 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
1718#ifndef CONFIG_X86_64 1737#ifndef CONFIG_X86_64
1719 /* guests cmpxchg8b have to be emulated atomically */ 1738 /* guests cmpxchg8b have to be emulated atomically */
1720 if (bytes == 8) { 1739 if (bytes == 8) {
1721 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 1740 gpa_t gpa;
1722 struct page *page; 1741 struct page *page;
1723 char *addr; 1742 char *addr;
1724 u64 val; 1743 u64 val;
1725 1744
1745 down_read(&current->mm->mmap_sem);
1746 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
1747
1726 if (gpa == UNMAPPED_GVA || 1748 if (gpa == UNMAPPED_GVA ||
1727 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) 1749 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
1728 goto emul_write; 1750 goto emul_write;
@@ -1736,8 +1758,9 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
1736 set_64bit((u64 *)(addr + offset_in_page(gpa)), val); 1758 set_64bit((u64 *)(addr + offset_in_page(gpa)), val);
1737 kunmap_atomic(addr, KM_USER0); 1759 kunmap_atomic(addr, KM_USER0);
1738 kvm_release_page_dirty(page); 1760 kvm_release_page_dirty(page);
1761 emul_write:
1762 up_read(&current->mm->mmap_sem);
1739 } 1763 }
1740emul_write:
1741#endif 1764#endif
1742 1765
1743 return emulator_write_emulated(addr, new, bytes, vcpu); 1766 return emulator_write_emulated(addr, new, bytes, vcpu);
@@ -2118,10 +2141,10 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2118 kvm_x86_ops->skip_emulated_instruction(vcpu); 2141 kvm_x86_ops->skip_emulated_instruction(vcpu);
2119 2142
2120 for (i = 0; i < nr_pages; ++i) { 2143 for (i = 0; i < nr_pages; ++i) {
2121 mutex_lock(&vcpu->kvm->lock); 2144 down_read(&current->mm->mmap_sem);
2122 page = gva_to_page(vcpu, address + i * PAGE_SIZE); 2145 page = gva_to_page(vcpu, address + i * PAGE_SIZE);
2123 vcpu->arch.pio.guest_pages[i] = page; 2146 vcpu->arch.pio.guest_pages[i] = page;
2124 mutex_unlock(&vcpu->kvm->lock); 2147 up_read(&current->mm->mmap_sem);
2125 if (!page) { 2148 if (!page) {
2126 kvm_inject_gp(vcpu, 0); 2149 kvm_inject_gp(vcpu, 0);
2127 free_pio_guest_pages(vcpu); 2150 free_pio_guest_pages(vcpu);
@@ -2247,7 +2270,6 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
2247 char instruction[3]; 2270 char instruction[3];
2248 int ret = 0; 2271 int ret = 0;
2249 2272
2250 mutex_lock(&vcpu->kvm->lock);
2251 2273
2252 /* 2274 /*
2253 * Blow out the MMU to ensure that no other VCPU has an active mapping 2275 * Blow out the MMU to ensure that no other VCPU has an active mapping
@@ -2262,8 +2284,6 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
2262 != X86EMUL_CONTINUE) 2284 != X86EMUL_CONTINUE)
2263 ret = -EFAULT; 2285 ret = -EFAULT;
2264 2286
2265 mutex_unlock(&vcpu->kvm->lock);
2266
2267 return ret; 2287 return ret;
2268} 2288}
2269 2289
@@ -2447,8 +2467,10 @@ static void vapic_enter(struct kvm_vcpu *vcpu)
2447 if (!apic || !apic->vapic_addr) 2467 if (!apic || !apic->vapic_addr)
2448 return; 2468 return;
2449 2469
2470 down_read(&current->mm->mmap_sem);
2450 page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); 2471 page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
2451 vcpu->arch.apic->vapic_page = page; 2472 vcpu->arch.apic->vapic_page = page;
2473 up_read(&current->mm->mmap_sem);
2452} 2474}
2453 2475
2454static void vapic_exit(struct kvm_vcpu *vcpu) 2476static void vapic_exit(struct kvm_vcpu *vcpu)
@@ -2910,13 +2932,13 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2910 gpa_t gpa; 2932 gpa_t gpa;
2911 2933
2912 vcpu_load(vcpu); 2934 vcpu_load(vcpu);
2913 mutex_lock(&vcpu->kvm->lock); 2935 down_read(&current->mm->mmap_sem);
2914 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, vaddr); 2936 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, vaddr);
2937 up_read(&current->mm->mmap_sem);
2915 tr->physical_address = gpa; 2938 tr->physical_address = gpa;
2916 tr->valid = gpa != UNMAPPED_GVA; 2939 tr->valid = gpa != UNMAPPED_GVA;
2917 tr->writeable = 1; 2940 tr->writeable = 1;
2918 tr->usermode = 0; 2941 tr->usermode = 0;
2919 mutex_unlock(&vcpu->kvm->lock);
2920 vcpu_put(vcpu); 2942 vcpu_put(vcpu);
2921 2943
2922 return 0; 2944 return 0;
@@ -3185,13 +3207,11 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
3185 */ 3207 */
3186 if (!user_alloc) { 3208 if (!user_alloc) {
3187 if (npages && !old.rmap) { 3209 if (npages && !old.rmap) {
3188 down_write(&current->mm->mmap_sem);
3189 memslot->userspace_addr = do_mmap(NULL, 0, 3210 memslot->userspace_addr = do_mmap(NULL, 0,
3190 npages * PAGE_SIZE, 3211 npages * PAGE_SIZE,
3191 PROT_READ | PROT_WRITE, 3212 PROT_READ | PROT_WRITE,
3192 MAP_SHARED | MAP_ANONYMOUS, 3213 MAP_SHARED | MAP_ANONYMOUS,
3193 0); 3214 0);
3194 up_write(&current->mm->mmap_sem);
3195 3215
3196 if (IS_ERR((void *)memslot->userspace_addr)) 3216 if (IS_ERR((void *)memslot->userspace_addr))
3197 return PTR_ERR((void *)memslot->userspace_addr); 3217 return PTR_ERR((void *)memslot->userspace_addr);
@@ -3199,10 +3219,8 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
3199 if (!old.user_alloc && old.rmap) { 3219 if (!old.user_alloc && old.rmap) {
3200 int ret; 3220 int ret;
3201 3221
3202 down_write(&current->mm->mmap_sem);
3203 ret = do_munmap(current->mm, old.userspace_addr, 3222 ret = do_munmap(current->mm, old.userspace_addr,
3204 old.npages * PAGE_SIZE); 3223 old.npages * PAGE_SIZE);
3205 up_write(&current->mm->mmap_sem);
3206 if (ret < 0) 3224 if (ret < 0)
3207 printk(KERN_WARNING 3225 printk(KERN_WARNING
3208 "kvm_vm_ioctl_set_memory_region: " 3226 "kvm_vm_ioctl_set_memory_region: "
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4026d7d64296..678e80561b74 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -227,7 +227,7 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
227 * 227 *
228 * Discontiguous memory is allowed, mostly for framebuffers. 228 * Discontiguous memory is allowed, mostly for framebuffers.
229 * 229 *
230 * Must be called holding kvm->lock. 230 * Must be called holding mmap_sem for write.
231 */ 231 */
232int __kvm_set_memory_region(struct kvm *kvm, 232int __kvm_set_memory_region(struct kvm *kvm,
233 struct kvm_userspace_memory_region *mem, 233 struct kvm_userspace_memory_region *mem,
@@ -338,9 +338,9 @@ int kvm_set_memory_region(struct kvm *kvm,
338{ 338{
339 int r; 339 int r;
340 340
341 mutex_lock(&kvm->lock); 341 down_write(&current->mm->mmap_sem);
342 r = __kvm_set_memory_region(kvm, mem, user_alloc); 342 r = __kvm_set_memory_region(kvm, mem, user_alloc);
343 mutex_unlock(&kvm->lock); 343 up_write(&current->mm->mmap_sem);
344 return r; 344 return r;
345} 345}
346EXPORT_SYMBOL_GPL(kvm_set_memory_region); 346EXPORT_SYMBOL_GPL(kvm_set_memory_region);
@@ -456,7 +456,7 @@ static unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
456/* 456/*
457 * Requires current->mm->mmap_sem to be held 457 * Requires current->mm->mmap_sem to be held
458 */ 458 */
459static struct page *__gfn_to_page(struct kvm *kvm, gfn_t gfn) 459struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
460{ 460{
461 struct page *page[1]; 461 struct page *page[1];
462 unsigned long addr; 462 unsigned long addr;
@@ -481,17 +481,6 @@ static struct page *__gfn_to_page(struct kvm *kvm, gfn_t gfn)
481 return page[0]; 481 return page[0];
482} 482}
483 483
484struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
485{
486 struct page *page;
487
488 down_read(&current->mm->mmap_sem);
489 page = __gfn_to_page(kvm, gfn);
490 up_read(&current->mm->mmap_sem);
491
492 return page;
493}
494
495EXPORT_SYMBOL_GPL(gfn_to_page); 484EXPORT_SYMBOL_GPL(gfn_to_page);
496 485
497void kvm_release_page_clean(struct page *page) 486void kvm_release_page_clean(struct page *page)
@@ -977,8 +966,7 @@ static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
977 966
978 if (!kvm_is_visible_gfn(kvm, vmf->pgoff)) 967 if (!kvm_is_visible_gfn(kvm, vmf->pgoff))
979 return VM_FAULT_SIGBUS; 968 return VM_FAULT_SIGBUS;
980 /* current->mm->mmap_sem is already held so call lockless version */ 969 page = gfn_to_page(kvm, vmf->pgoff);
981 page = __gfn_to_page(kvm, vmf->pgoff);
982 if (is_error_page(page)) { 970 if (is_error_page(page)) {
983 kvm_release_page_clean(page); 971 kvm_release_page_clean(page);
984 return VM_FAULT_SIGBUS; 972 return VM_FAULT_SIGBUS;