diff options
author | Marcelo Tosatti <mtosatti@redhat.com> | 2007-12-20 19:18:22 -0500 |
---|---|---|
committer | Avi Kivity <avi@qumranet.com> | 2008-01-30 11:01:20 -0500 |
commit | 10589a4699bb978c781ce73bbae8ca942c5250c9 (patch) | |
tree | 5585ed87fff0a2ba259fcc6f998022481da75f68 /arch | |
parent | 774ead3ad9bcbc05ef6aaebb9bdf8b4c3126923b (diff) |
KVM: MMU: Concurrent guest walkers
Do not hold kvm->lock mutex across the entire pagefault code,
only acquire it in places where it is necessary, such as mmu
hash list, active list, rmap and parent pte handling.
Allow concurrent guest walkers by switching walk_addr() to use
mmap_sem in read-mode.
And get rid of the lockless __gfn_to_page.
[avi: move kvm_mmu_pte_write() locking inside the function]
[avi: add locking for real mode]
[avi: fix cmpxchg locking]
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/kvm/mmu.c | 41 | ||||
-rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 8 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 25 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 92 |
4 files changed, 112 insertions, 54 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 8f12ec52ad86..3b91227969a5 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -974,7 +974,7 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) | |||
974 | { | 974 | { |
975 | } | 975 | } |
976 | 976 | ||
977 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | 977 | static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) |
978 | { | 978 | { |
979 | int level = PT32E_ROOT_LEVEL; | 979 | int level = PT32E_ROOT_LEVEL; |
980 | hpa_t table_addr = vcpu->arch.mmu.root_hpa; | 980 | hpa_t table_addr = vcpu->arch.mmu.root_hpa; |
@@ -1015,6 +1015,17 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | |||
1015 | } | 1015 | } |
1016 | } | 1016 | } |
1017 | 1017 | ||
1018 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | ||
1019 | { | ||
1020 | int r; | ||
1021 | |||
1022 | mutex_lock(&vcpu->kvm->lock); | ||
1023 | r = __nonpaging_map(vcpu, v, write, gfn); | ||
1024 | mutex_unlock(&vcpu->kvm->lock); | ||
1025 | return r; | ||
1026 | } | ||
1027 | |||
1028 | |||
1018 | static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu, | 1029 | static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu, |
1019 | struct kvm_mmu_page *sp) | 1030 | struct kvm_mmu_page *sp) |
1020 | { | 1031 | { |
@@ -1031,6 +1042,7 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) | |||
1031 | 1042 | ||
1032 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | 1043 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) |
1033 | return; | 1044 | return; |
1045 | mutex_lock(&vcpu->kvm->lock); | ||
1034 | #ifdef CONFIG_X86_64 | 1046 | #ifdef CONFIG_X86_64 |
1035 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { | 1047 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { |
1036 | hpa_t root = vcpu->arch.mmu.root_hpa; | 1048 | hpa_t root = vcpu->arch.mmu.root_hpa; |
@@ -1038,6 +1050,7 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) | |||
1038 | sp = page_header(root); | 1050 | sp = page_header(root); |
1039 | --sp->root_count; | 1051 | --sp->root_count; |
1040 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; | 1052 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; |
1053 | mutex_unlock(&vcpu->kvm->lock); | ||
1041 | return; | 1054 | return; |
1042 | } | 1055 | } |
1043 | #endif | 1056 | #endif |
@@ -1051,6 +1064,7 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) | |||
1051 | } | 1064 | } |
1052 | vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; | 1065 | vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; |
1053 | } | 1066 | } |
1067 | mutex_unlock(&vcpu->kvm->lock); | ||
1054 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; | 1068 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; |
1055 | } | 1069 | } |
1056 | 1070 | ||
@@ -1250,15 +1264,15 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu) | |||
1250 | { | 1264 | { |
1251 | int r; | 1265 | int r; |
1252 | 1266 | ||
1253 | mutex_lock(&vcpu->kvm->lock); | ||
1254 | r = mmu_topup_memory_caches(vcpu); | 1267 | r = mmu_topup_memory_caches(vcpu); |
1255 | if (r) | 1268 | if (r) |
1256 | goto out; | 1269 | goto out; |
1270 | mutex_lock(&vcpu->kvm->lock); | ||
1257 | mmu_alloc_roots(vcpu); | 1271 | mmu_alloc_roots(vcpu); |
1272 | mutex_unlock(&vcpu->kvm->lock); | ||
1258 | kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa); | 1273 | kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa); |
1259 | kvm_mmu_flush_tlb(vcpu); | 1274 | kvm_mmu_flush_tlb(vcpu); |
1260 | out: | 1275 | out: |
1261 | mutex_unlock(&vcpu->kvm->lock); | ||
1262 | return r; | 1276 | return r; |
1263 | } | 1277 | } |
1264 | EXPORT_SYMBOL_GPL(kvm_mmu_load); | 1278 | EXPORT_SYMBOL_GPL(kvm_mmu_load); |
@@ -1353,6 +1367,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1353 | int npte; | 1367 | int npte; |
1354 | 1368 | ||
1355 | pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes); | 1369 | pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes); |
1370 | mutex_lock(&vcpu->kvm->lock); | ||
1356 | ++vcpu->kvm->stat.mmu_pte_write; | 1371 | ++vcpu->kvm->stat.mmu_pte_write; |
1357 | kvm_mmu_audit(vcpu, "pre pte write"); | 1372 | kvm_mmu_audit(vcpu, "pre pte write"); |
1358 | if (gfn == vcpu->arch.last_pt_write_gfn | 1373 | if (gfn == vcpu->arch.last_pt_write_gfn |
@@ -1421,17 +1436,27 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1421 | } | 1436 | } |
1422 | } | 1437 | } |
1423 | kvm_mmu_audit(vcpu, "post pte write"); | 1438 | kvm_mmu_audit(vcpu, "post pte write"); |
1439 | mutex_unlock(&vcpu->kvm->lock); | ||
1424 | } | 1440 | } |
1425 | 1441 | ||
1426 | int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) | 1442 | int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) |
1427 | { | 1443 | { |
1428 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva); | 1444 | gpa_t gpa; |
1445 | int r; | ||
1429 | 1446 | ||
1430 | return kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); | 1447 | down_read(¤t->mm->mmap_sem); |
1448 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva); | ||
1449 | up_read(¤t->mm->mmap_sem); | ||
1450 | |||
1451 | mutex_lock(&vcpu->kvm->lock); | ||
1452 | r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); | ||
1453 | mutex_unlock(&vcpu->kvm->lock); | ||
1454 | return r; | ||
1431 | } | 1455 | } |
1432 | 1456 | ||
1433 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) | 1457 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) |
1434 | { | 1458 | { |
1459 | mutex_lock(&vcpu->kvm->lock); | ||
1435 | while (vcpu->kvm->arch.n_free_mmu_pages < KVM_REFILL_PAGES) { | 1460 | while (vcpu->kvm->arch.n_free_mmu_pages < KVM_REFILL_PAGES) { |
1436 | struct kvm_mmu_page *sp; | 1461 | struct kvm_mmu_page *sp; |
1437 | 1462 | ||
@@ -1440,6 +1465,7 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) | |||
1440 | kvm_mmu_zap_page(vcpu->kvm, sp); | 1465 | kvm_mmu_zap_page(vcpu->kvm, sp); |
1441 | ++vcpu->kvm->stat.mmu_recycled; | 1466 | ++vcpu->kvm->stat.mmu_recycled; |
1442 | } | 1467 | } |
1468 | mutex_unlock(&vcpu->kvm->lock); | ||
1443 | } | 1469 | } |
1444 | 1470 | ||
1445 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) | 1471 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) |
@@ -1447,7 +1473,6 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) | |||
1447 | int r; | 1473 | int r; |
1448 | enum emulation_result er; | 1474 | enum emulation_result er; |
1449 | 1475 | ||
1450 | mutex_lock(&vcpu->kvm->lock); | ||
1451 | r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code); | 1476 | r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code); |
1452 | if (r < 0) | 1477 | if (r < 0) |
1453 | goto out; | 1478 | goto out; |
@@ -1462,7 +1487,6 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) | |||
1462 | goto out; | 1487 | goto out; |
1463 | 1488 | ||
1464 | er = emulate_instruction(vcpu, vcpu->run, cr2, error_code, 0); | 1489 | er = emulate_instruction(vcpu, vcpu->run, cr2, error_code, 0); |
1465 | mutex_unlock(&vcpu->kvm->lock); | ||
1466 | 1490 | ||
1467 | switch (er) { | 1491 | switch (er) { |
1468 | case EMULATE_DONE: | 1492 | case EMULATE_DONE: |
@@ -1477,7 +1501,6 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) | |||
1477 | BUG(); | 1501 | BUG(); |
1478 | } | 1502 | } |
1479 | out: | 1503 | out: |
1480 | mutex_unlock(&vcpu->kvm->lock); | ||
1481 | return r; | 1504 | return r; |
1482 | } | 1505 | } |
1483 | EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); | 1506 | EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); |
@@ -1574,8 +1597,10 @@ void kvm_mmu_zap_all(struct kvm *kvm) | |||
1574 | { | 1597 | { |
1575 | struct kvm_mmu_page *sp, *node; | 1598 | struct kvm_mmu_page *sp, *node; |
1576 | 1599 | ||
1600 | mutex_lock(&kvm->lock); | ||
1577 | list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) | 1601 | list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) |
1578 | kvm_mmu_zap_page(kvm, sp); | 1602 | kvm_mmu_zap_page(kvm, sp); |
1603 | mutex_unlock(&kvm->lock); | ||
1579 | 1604 | ||
1580 | kvm_flush_remote_tlbs(kvm); | 1605 | kvm_flush_remote_tlbs(kvm); |
1581 | } | 1606 | } |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 56b88f7e83ef..7f83f5557d5e 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -368,11 +368,13 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
368 | if (r) | 368 | if (r) |
369 | return r; | 369 | return r; |
370 | 370 | ||
371 | down_read(¤t->mm->mmap_sem); | ||
371 | /* | 372 | /* |
372 | * Look up the shadow pte for the faulting address. | 373 | * Look up the shadow pte for the faulting address. |
373 | */ | 374 | */ |
374 | r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault, | 375 | r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault, |
375 | fetch_fault); | 376 | fetch_fault); |
377 | up_read(¤t->mm->mmap_sem); | ||
376 | 378 | ||
377 | /* | 379 | /* |
378 | * The page is not mapped by the guest. Let the guest handle it. | 380 | * The page is not mapped by the guest. Let the guest handle it. |
@@ -384,6 +386,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
384 | return 0; | 386 | return 0; |
385 | } | 387 | } |
386 | 388 | ||
389 | mutex_lock(&vcpu->kvm->lock); | ||
387 | shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, | 390 | shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, |
388 | &write_pt); | 391 | &write_pt); |
389 | pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__, | 392 | pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__, |
@@ -395,11 +398,14 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
395 | /* | 398 | /* |
396 | * mmio: emulate if accessible, otherwise its a guest fault. | 399 | * mmio: emulate if accessible, otherwise its a guest fault. |
397 | */ | 400 | */ |
398 | if (shadow_pte && is_io_pte(*shadow_pte)) | 401 | if (shadow_pte && is_io_pte(*shadow_pte)) { |
402 | mutex_unlock(&vcpu->kvm->lock); | ||
399 | return 1; | 403 | return 1; |
404 | } | ||
400 | 405 | ||
401 | ++vcpu->stat.pf_fixed; | 406 | ++vcpu->stat.pf_fixed; |
402 | kvm_mmu_audit(vcpu, "post page fault (fixed)"); | 407 | kvm_mmu_audit(vcpu, "post page fault (fixed)"); |
408 | mutex_unlock(&vcpu->kvm->lock); | ||
403 | 409 | ||
404 | return write_pt; | 410 | return write_pt; |
405 | } | 411 | } |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d1167fc303d6..c39493feba46 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -1432,27 +1432,34 @@ static int init_rmode_tss(struct kvm *kvm) | |||
1432 | { | 1432 | { |
1433 | gfn_t fn = rmode_tss_base(kvm) >> PAGE_SHIFT; | 1433 | gfn_t fn = rmode_tss_base(kvm) >> PAGE_SHIFT; |
1434 | u16 data = 0; | 1434 | u16 data = 0; |
1435 | int ret = 0; | ||
1435 | int r; | 1436 | int r; |
1436 | 1437 | ||
1438 | down_read(¤t->mm->mmap_sem); | ||
1437 | r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); | 1439 | r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); |
1438 | if (r < 0) | 1440 | if (r < 0) |
1439 | return 0; | 1441 | goto out; |
1440 | data = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE; | 1442 | data = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE; |
1441 | r = kvm_write_guest_page(kvm, fn++, &data, 0x66, sizeof(u16)); | 1443 | r = kvm_write_guest_page(kvm, fn++, &data, 0x66, sizeof(u16)); |
1442 | if (r < 0) | 1444 | if (r < 0) |
1443 | return 0; | 1445 | goto out; |
1444 | r = kvm_clear_guest_page(kvm, fn++, 0, PAGE_SIZE); | 1446 | r = kvm_clear_guest_page(kvm, fn++, 0, PAGE_SIZE); |
1445 | if (r < 0) | 1447 | if (r < 0) |
1446 | return 0; | 1448 | goto out; |
1447 | r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); | 1449 | r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); |
1448 | if (r < 0) | 1450 | if (r < 0) |
1449 | return 0; | 1451 | goto out; |
1450 | data = ~0; | 1452 | data = ~0; |
1451 | r = kvm_write_guest_page(kvm, fn, &data, RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1, | 1453 | r = kvm_write_guest_page(kvm, fn, &data, |
1452 | sizeof(u8)); | 1454 | RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1, |
1455 | sizeof(u8)); | ||
1453 | if (r < 0) | 1456 | if (r < 0) |
1454 | return 0; | 1457 | goto out; |
1455 | return 1; | 1458 | |
1459 | ret = 1; | ||
1460 | out: | ||
1461 | up_read(¤t->mm->mmap_sem); | ||
1462 | return ret; | ||
1456 | } | 1463 | } |
1457 | 1464 | ||
1458 | static void seg_setup(int seg) | 1465 | static void seg_setup(int seg) |
@@ -1471,6 +1478,7 @@ static int alloc_apic_access_page(struct kvm *kvm) | |||
1471 | int r = 0; | 1478 | int r = 0; |
1472 | 1479 | ||
1473 | mutex_lock(&kvm->lock); | 1480 | mutex_lock(&kvm->lock); |
1481 | down_write(¤t->mm->mmap_sem); | ||
1474 | if (kvm->arch.apic_access_page) | 1482 | if (kvm->arch.apic_access_page) |
1475 | goto out; | 1483 | goto out; |
1476 | kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT; | 1484 | kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT; |
@@ -1482,6 +1490,7 @@ static int alloc_apic_access_page(struct kvm *kvm) | |||
1482 | goto out; | 1490 | goto out; |
1483 | kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00); | 1491 | kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00); |
1484 | out: | 1492 | out: |
1493 | up_write(¤t->mm->mmap_sem); | ||
1485 | mutex_unlock(&kvm->lock); | 1494 | mutex_unlock(&kvm->lock); |
1486 | return r; | 1495 | return r; |
1487 | } | 1496 | } |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1f48ec871035..e3b3141db13c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -181,7 +181,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
181 | int ret; | 181 | int ret; |
182 | u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)]; | 182 | u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)]; |
183 | 183 | ||
184 | mutex_lock(&vcpu->kvm->lock); | 184 | down_read(¤t->mm->mmap_sem); |
185 | ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte, | 185 | ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte, |
186 | offset * sizeof(u64), sizeof(pdpte)); | 186 | offset * sizeof(u64), sizeof(pdpte)); |
187 | if (ret < 0) { | 187 | if (ret < 0) { |
@@ -198,7 +198,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
198 | 198 | ||
199 | memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs)); | 199 | memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs)); |
200 | out: | 200 | out: |
201 | mutex_unlock(&vcpu->kvm->lock); | 201 | up_read(¤t->mm->mmap_sem); |
202 | 202 | ||
203 | return ret; | 203 | return ret; |
204 | } | 204 | } |
@@ -212,13 +212,13 @@ static bool pdptrs_changed(struct kvm_vcpu *vcpu) | |||
212 | if (is_long_mode(vcpu) || !is_pae(vcpu)) | 212 | if (is_long_mode(vcpu) || !is_pae(vcpu)) |
213 | return false; | 213 | return false; |
214 | 214 | ||
215 | mutex_lock(&vcpu->kvm->lock); | 215 | down_read(¤t->mm->mmap_sem); |
216 | r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte)); | 216 | r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte)); |
217 | if (r < 0) | 217 | if (r < 0) |
218 | goto out; | 218 | goto out; |
219 | changed = memcmp(pdpte, vcpu->arch.pdptrs, sizeof(pdpte)) != 0; | 219 | changed = memcmp(pdpte, vcpu->arch.pdptrs, sizeof(pdpte)) != 0; |
220 | out: | 220 | out: |
221 | mutex_unlock(&vcpu->kvm->lock); | 221 | up_read(¤t->mm->mmap_sem); |
222 | 222 | ||
223 | return changed; | 223 | return changed; |
224 | } | 224 | } |
@@ -278,9 +278,7 @@ void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
278 | kvm_x86_ops->set_cr0(vcpu, cr0); | 278 | kvm_x86_ops->set_cr0(vcpu, cr0); |
279 | vcpu->arch.cr0 = cr0; | 279 | vcpu->arch.cr0 = cr0; |
280 | 280 | ||
281 | mutex_lock(&vcpu->kvm->lock); | ||
282 | kvm_mmu_reset_context(vcpu); | 281 | kvm_mmu_reset_context(vcpu); |
283 | mutex_unlock(&vcpu->kvm->lock); | ||
284 | return; | 282 | return; |
285 | } | 283 | } |
286 | EXPORT_SYMBOL_GPL(set_cr0); | 284 | EXPORT_SYMBOL_GPL(set_cr0); |
@@ -320,9 +318,7 @@ void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
320 | } | 318 | } |
321 | kvm_x86_ops->set_cr4(vcpu, cr4); | 319 | kvm_x86_ops->set_cr4(vcpu, cr4); |
322 | vcpu->arch.cr4 = cr4; | 320 | vcpu->arch.cr4 = cr4; |
323 | mutex_lock(&vcpu->kvm->lock); | ||
324 | kvm_mmu_reset_context(vcpu); | 321 | kvm_mmu_reset_context(vcpu); |
325 | mutex_unlock(&vcpu->kvm->lock); | ||
326 | } | 322 | } |
327 | EXPORT_SYMBOL_GPL(set_cr4); | 323 | EXPORT_SYMBOL_GPL(set_cr4); |
328 | 324 | ||
@@ -360,7 +356,7 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
360 | */ | 356 | */ |
361 | } | 357 | } |
362 | 358 | ||
363 | mutex_lock(&vcpu->kvm->lock); | 359 | down_read(¤t->mm->mmap_sem); |
364 | /* | 360 | /* |
365 | * Does the new cr3 value map to physical memory? (Note, we | 361 | * Does the new cr3 value map to physical memory? (Note, we |
366 | * catch an invalid cr3 even in real-mode, because it would | 362 | * catch an invalid cr3 even in real-mode, because it would |
@@ -376,7 +372,7 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
376 | vcpu->arch.cr3 = cr3; | 372 | vcpu->arch.cr3 = cr3; |
377 | vcpu->arch.mmu.new_cr3(vcpu); | 373 | vcpu->arch.mmu.new_cr3(vcpu); |
378 | } | 374 | } |
379 | mutex_unlock(&vcpu->kvm->lock); | 375 | up_read(¤t->mm->mmap_sem); |
380 | } | 376 | } |
381 | EXPORT_SYMBOL_GPL(set_cr3); | 377 | EXPORT_SYMBOL_GPL(set_cr3); |
382 | 378 | ||
@@ -1211,12 +1207,12 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm, | |||
1211 | if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES) | 1207 | if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES) |
1212 | return -EINVAL; | 1208 | return -EINVAL; |
1213 | 1209 | ||
1214 | mutex_lock(&kvm->lock); | 1210 | down_write(¤t->mm->mmap_sem); |
1215 | 1211 | ||
1216 | kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); | 1212 | kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); |
1217 | kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; | 1213 | kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; |
1218 | 1214 | ||
1219 | mutex_unlock(&kvm->lock); | 1215 | up_write(¤t->mm->mmap_sem); |
1220 | return 0; | 1216 | return 0; |
1221 | } | 1217 | } |
1222 | 1218 | ||
@@ -1265,7 +1261,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, | |||
1265 | < alias->target_phys_addr) | 1261 | < alias->target_phys_addr) |
1266 | goto out; | 1262 | goto out; |
1267 | 1263 | ||
1268 | mutex_lock(&kvm->lock); | 1264 | down_write(¤t->mm->mmap_sem); |
1269 | 1265 | ||
1270 | p = &kvm->arch.aliases[alias->slot]; | 1266 | p = &kvm->arch.aliases[alias->slot]; |
1271 | p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; | 1267 | p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; |
@@ -1279,7 +1275,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, | |||
1279 | 1275 | ||
1280 | kvm_mmu_zap_all(kvm); | 1276 | kvm_mmu_zap_all(kvm); |
1281 | 1277 | ||
1282 | mutex_unlock(&kvm->lock); | 1278 | up_write(¤t->mm->mmap_sem); |
1283 | 1279 | ||
1284 | return 0; | 1280 | return 0; |
1285 | 1281 | ||
@@ -1355,7 +1351,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
1355 | struct kvm_memory_slot *memslot; | 1351 | struct kvm_memory_slot *memslot; |
1356 | int is_dirty = 0; | 1352 | int is_dirty = 0; |
1357 | 1353 | ||
1358 | mutex_lock(&kvm->lock); | 1354 | down_write(¤t->mm->mmap_sem); |
1359 | 1355 | ||
1360 | r = kvm_get_dirty_log(kvm, log, &is_dirty); | 1356 | r = kvm_get_dirty_log(kvm, log, &is_dirty); |
1361 | if (r) | 1357 | if (r) |
@@ -1371,7 +1367,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
1371 | } | 1367 | } |
1372 | r = 0; | 1368 | r = 0; |
1373 | out: | 1369 | out: |
1374 | mutex_unlock(&kvm->lock); | 1370 | up_write(¤t->mm->mmap_sem); |
1375 | return r; | 1371 | return r; |
1376 | } | 1372 | } |
1377 | 1373 | ||
@@ -1565,25 +1561,32 @@ int emulator_read_std(unsigned long addr, | |||
1565 | struct kvm_vcpu *vcpu) | 1561 | struct kvm_vcpu *vcpu) |
1566 | { | 1562 | { |
1567 | void *data = val; | 1563 | void *data = val; |
1564 | int r = X86EMUL_CONTINUE; | ||
1568 | 1565 | ||
1566 | down_read(¤t->mm->mmap_sem); | ||
1569 | while (bytes) { | 1567 | while (bytes) { |
1570 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); | 1568 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); |
1571 | unsigned offset = addr & (PAGE_SIZE-1); | 1569 | unsigned offset = addr & (PAGE_SIZE-1); |
1572 | unsigned tocopy = min(bytes, (unsigned)PAGE_SIZE - offset); | 1570 | unsigned tocopy = min(bytes, (unsigned)PAGE_SIZE - offset); |
1573 | int ret; | 1571 | int ret; |
1574 | 1572 | ||
1575 | if (gpa == UNMAPPED_GVA) | 1573 | if (gpa == UNMAPPED_GVA) { |
1576 | return X86EMUL_PROPAGATE_FAULT; | 1574 | r = X86EMUL_PROPAGATE_FAULT; |
1575 | goto out; | ||
1576 | } | ||
1577 | ret = kvm_read_guest(vcpu->kvm, gpa, data, tocopy); | 1577 | ret = kvm_read_guest(vcpu->kvm, gpa, data, tocopy); |
1578 | if (ret < 0) | 1578 | if (ret < 0) { |
1579 | return X86EMUL_UNHANDLEABLE; | 1579 | r = X86EMUL_UNHANDLEABLE; |
1580 | goto out; | ||
1581 | } | ||
1580 | 1582 | ||
1581 | bytes -= tocopy; | 1583 | bytes -= tocopy; |
1582 | data += tocopy; | 1584 | data += tocopy; |
1583 | addr += tocopy; | 1585 | addr += tocopy; |
1584 | } | 1586 | } |
1585 | 1587 | out: | |
1586 | return X86EMUL_CONTINUE; | 1588 | up_read(¤t->mm->mmap_sem); |
1589 | return r; | ||
1587 | } | 1590 | } |
1588 | EXPORT_SYMBOL_GPL(emulator_read_std); | 1591 | EXPORT_SYMBOL_GPL(emulator_read_std); |
1589 | 1592 | ||
@@ -1601,7 +1604,9 @@ static int emulator_read_emulated(unsigned long addr, | |||
1601 | return X86EMUL_CONTINUE; | 1604 | return X86EMUL_CONTINUE; |
1602 | } | 1605 | } |
1603 | 1606 | ||
1607 | down_read(¤t->mm->mmap_sem); | ||
1604 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); | 1608 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); |
1609 | up_read(¤t->mm->mmap_sem); | ||
1605 | 1610 | ||
1606 | /* For APIC access vmexit */ | 1611 | /* For APIC access vmexit */ |
1607 | if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) | 1612 | if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) |
@@ -1617,11 +1622,14 @@ mmio: | |||
1617 | /* | 1622 | /* |
1618 | * Is this MMIO handled locally? | 1623 | * Is this MMIO handled locally? |
1619 | */ | 1624 | */ |
1625 | mutex_lock(&vcpu->kvm->lock); | ||
1620 | mmio_dev = vcpu_find_mmio_dev(vcpu, gpa); | 1626 | mmio_dev = vcpu_find_mmio_dev(vcpu, gpa); |
1621 | if (mmio_dev) { | 1627 | if (mmio_dev) { |
1622 | kvm_iodevice_read(mmio_dev, gpa, bytes, val); | 1628 | kvm_iodevice_read(mmio_dev, gpa, bytes, val); |
1629 | mutex_unlock(&vcpu->kvm->lock); | ||
1623 | return X86EMUL_CONTINUE; | 1630 | return X86EMUL_CONTINUE; |
1624 | } | 1631 | } |
1632 | mutex_unlock(&vcpu->kvm->lock); | ||
1625 | 1633 | ||
1626 | vcpu->mmio_needed = 1; | 1634 | vcpu->mmio_needed = 1; |
1627 | vcpu->mmio_phys_addr = gpa; | 1635 | vcpu->mmio_phys_addr = gpa; |
@@ -1636,10 +1644,14 @@ static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1636 | { | 1644 | { |
1637 | int ret; | 1645 | int ret; |
1638 | 1646 | ||
1647 | down_read(¤t->mm->mmap_sem); | ||
1639 | ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); | 1648 | ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); |
1640 | if (ret < 0) | 1649 | if (ret < 0) { |
1650 | up_read(¤t->mm->mmap_sem); | ||
1641 | return 0; | 1651 | return 0; |
1652 | } | ||
1642 | kvm_mmu_pte_write(vcpu, gpa, val, bytes); | 1653 | kvm_mmu_pte_write(vcpu, gpa, val, bytes); |
1654 | up_read(¤t->mm->mmap_sem); | ||
1643 | return 1; | 1655 | return 1; |
1644 | } | 1656 | } |
1645 | 1657 | ||
@@ -1649,7 +1661,11 @@ static int emulator_write_emulated_onepage(unsigned long addr, | |||
1649 | struct kvm_vcpu *vcpu) | 1661 | struct kvm_vcpu *vcpu) |
1650 | { | 1662 | { |
1651 | struct kvm_io_device *mmio_dev; | 1663 | struct kvm_io_device *mmio_dev; |
1652 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); | 1664 | gpa_t gpa; |
1665 | |||
1666 | down_read(¤t->mm->mmap_sem); | ||
1667 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); | ||
1668 | up_read(¤t->mm->mmap_sem); | ||
1653 | 1669 | ||
1654 | if (gpa == UNMAPPED_GVA) { | 1670 | if (gpa == UNMAPPED_GVA) { |
1655 | kvm_inject_page_fault(vcpu, addr, 2); | 1671 | kvm_inject_page_fault(vcpu, addr, 2); |
@@ -1667,11 +1683,14 @@ mmio: | |||
1667 | /* | 1683 | /* |
1668 | * Is this MMIO handled locally? | 1684 | * Is this MMIO handled locally? |
1669 | */ | 1685 | */ |
1686 | mutex_lock(&vcpu->kvm->lock); | ||
1670 | mmio_dev = vcpu_find_mmio_dev(vcpu, gpa); | 1687 | mmio_dev = vcpu_find_mmio_dev(vcpu, gpa); |
1671 | if (mmio_dev) { | 1688 | if (mmio_dev) { |
1672 | kvm_iodevice_write(mmio_dev, gpa, bytes, val); | 1689 | kvm_iodevice_write(mmio_dev, gpa, bytes, val); |
1690 | mutex_unlock(&vcpu->kvm->lock); | ||
1673 | return X86EMUL_CONTINUE; | 1691 | return X86EMUL_CONTINUE; |
1674 | } | 1692 | } |
1693 | mutex_unlock(&vcpu->kvm->lock); | ||
1675 | 1694 | ||
1676 | vcpu->mmio_needed = 1; | 1695 | vcpu->mmio_needed = 1; |
1677 | vcpu->mmio_phys_addr = gpa; | 1696 | vcpu->mmio_phys_addr = gpa; |
@@ -1718,11 +1737,14 @@ static int emulator_cmpxchg_emulated(unsigned long addr, | |||
1718 | #ifndef CONFIG_X86_64 | 1737 | #ifndef CONFIG_X86_64 |
1719 | /* guests cmpxchg8b have to be emulated atomically */ | 1738 | /* guests cmpxchg8b have to be emulated atomically */ |
1720 | if (bytes == 8) { | 1739 | if (bytes == 8) { |
1721 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); | 1740 | gpa_t gpa; |
1722 | struct page *page; | 1741 | struct page *page; |
1723 | char *addr; | 1742 | char *addr; |
1724 | u64 val; | 1743 | u64 val; |
1725 | 1744 | ||
1745 | down_read(¤t->mm->mmap_sem); | ||
1746 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); | ||
1747 | |||
1726 | if (gpa == UNMAPPED_GVA || | 1748 | if (gpa == UNMAPPED_GVA || |
1727 | (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) | 1749 | (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) |
1728 | goto emul_write; | 1750 | goto emul_write; |
@@ -1736,8 +1758,9 @@ static int emulator_cmpxchg_emulated(unsigned long addr, | |||
1736 | set_64bit((u64 *)(addr + offset_in_page(gpa)), val); | 1758 | set_64bit((u64 *)(addr + offset_in_page(gpa)), val); |
1737 | kunmap_atomic(addr, KM_USER0); | 1759 | kunmap_atomic(addr, KM_USER0); |
1738 | kvm_release_page_dirty(page); | 1760 | kvm_release_page_dirty(page); |
1761 | emul_write: | ||
1762 | up_read(¤t->mm->mmap_sem); | ||
1739 | } | 1763 | } |
1740 | emul_write: | ||
1741 | #endif | 1764 | #endif |
1742 | 1765 | ||
1743 | return emulator_write_emulated(addr, new, bytes, vcpu); | 1766 | return emulator_write_emulated(addr, new, bytes, vcpu); |
@@ -2118,10 +2141,10 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | |||
2118 | kvm_x86_ops->skip_emulated_instruction(vcpu); | 2141 | kvm_x86_ops->skip_emulated_instruction(vcpu); |
2119 | 2142 | ||
2120 | for (i = 0; i < nr_pages; ++i) { | 2143 | for (i = 0; i < nr_pages; ++i) { |
2121 | mutex_lock(&vcpu->kvm->lock); | 2144 | down_read(¤t->mm->mmap_sem); |
2122 | page = gva_to_page(vcpu, address + i * PAGE_SIZE); | 2145 | page = gva_to_page(vcpu, address + i * PAGE_SIZE); |
2123 | vcpu->arch.pio.guest_pages[i] = page; | 2146 | vcpu->arch.pio.guest_pages[i] = page; |
2124 | mutex_unlock(&vcpu->kvm->lock); | 2147 | up_read(¤t->mm->mmap_sem); |
2125 | if (!page) { | 2148 | if (!page) { |
2126 | kvm_inject_gp(vcpu, 0); | 2149 | kvm_inject_gp(vcpu, 0); |
2127 | free_pio_guest_pages(vcpu); | 2150 | free_pio_guest_pages(vcpu); |
@@ -2247,7 +2270,6 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu) | |||
2247 | char instruction[3]; | 2270 | char instruction[3]; |
2248 | int ret = 0; | 2271 | int ret = 0; |
2249 | 2272 | ||
2250 | mutex_lock(&vcpu->kvm->lock); | ||
2251 | 2273 | ||
2252 | /* | 2274 | /* |
2253 | * Blow out the MMU to ensure that no other VCPU has an active mapping | 2275 | * Blow out the MMU to ensure that no other VCPU has an active mapping |
@@ -2262,8 +2284,6 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu) | |||
2262 | != X86EMUL_CONTINUE) | 2284 | != X86EMUL_CONTINUE) |
2263 | ret = -EFAULT; | 2285 | ret = -EFAULT; |
2264 | 2286 | ||
2265 | mutex_unlock(&vcpu->kvm->lock); | ||
2266 | |||
2267 | return ret; | 2287 | return ret; |
2268 | } | 2288 | } |
2269 | 2289 | ||
@@ -2447,8 +2467,10 @@ static void vapic_enter(struct kvm_vcpu *vcpu) | |||
2447 | if (!apic || !apic->vapic_addr) | 2467 | if (!apic || !apic->vapic_addr) |
2448 | return; | 2468 | return; |
2449 | 2469 | ||
2470 | down_read(¤t->mm->mmap_sem); | ||
2450 | page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); | 2471 | page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); |
2451 | vcpu->arch.apic->vapic_page = page; | 2472 | vcpu->arch.apic->vapic_page = page; |
2473 | up_read(¤t->mm->mmap_sem); | ||
2452 | } | 2474 | } |
2453 | 2475 | ||
2454 | static void vapic_exit(struct kvm_vcpu *vcpu) | 2476 | static void vapic_exit(struct kvm_vcpu *vcpu) |
@@ -2910,13 +2932,13 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, | |||
2910 | gpa_t gpa; | 2932 | gpa_t gpa; |
2911 | 2933 | ||
2912 | vcpu_load(vcpu); | 2934 | vcpu_load(vcpu); |
2913 | mutex_lock(&vcpu->kvm->lock); | 2935 | down_read(¤t->mm->mmap_sem); |
2914 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, vaddr); | 2936 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, vaddr); |
2937 | up_read(¤t->mm->mmap_sem); | ||
2915 | tr->physical_address = gpa; | 2938 | tr->physical_address = gpa; |
2916 | tr->valid = gpa != UNMAPPED_GVA; | 2939 | tr->valid = gpa != UNMAPPED_GVA; |
2917 | tr->writeable = 1; | 2940 | tr->writeable = 1; |
2918 | tr->usermode = 0; | 2941 | tr->usermode = 0; |
2919 | mutex_unlock(&vcpu->kvm->lock); | ||
2920 | vcpu_put(vcpu); | 2942 | vcpu_put(vcpu); |
2921 | 2943 | ||
2922 | return 0; | 2944 | return 0; |
@@ -3185,13 +3207,11 @@ int kvm_arch_set_memory_region(struct kvm *kvm, | |||
3185 | */ | 3207 | */ |
3186 | if (!user_alloc) { | 3208 | if (!user_alloc) { |
3187 | if (npages && !old.rmap) { | 3209 | if (npages && !old.rmap) { |
3188 | down_write(¤t->mm->mmap_sem); | ||
3189 | memslot->userspace_addr = do_mmap(NULL, 0, | 3210 | memslot->userspace_addr = do_mmap(NULL, 0, |
3190 | npages * PAGE_SIZE, | 3211 | npages * PAGE_SIZE, |
3191 | PROT_READ | PROT_WRITE, | 3212 | PROT_READ | PROT_WRITE, |
3192 | MAP_SHARED | MAP_ANONYMOUS, | 3213 | MAP_SHARED | MAP_ANONYMOUS, |
3193 | 0); | 3214 | 0); |
3194 | up_write(¤t->mm->mmap_sem); | ||
3195 | 3215 | ||
3196 | if (IS_ERR((void *)memslot->userspace_addr)) | 3216 | if (IS_ERR((void *)memslot->userspace_addr)) |
3197 | return PTR_ERR((void *)memslot->userspace_addr); | 3217 | return PTR_ERR((void *)memslot->userspace_addr); |
@@ -3199,10 +3219,8 @@ int kvm_arch_set_memory_region(struct kvm *kvm, | |||
3199 | if (!old.user_alloc && old.rmap) { | 3219 | if (!old.user_alloc && old.rmap) { |
3200 | int ret; | 3220 | int ret; |
3201 | 3221 | ||
3202 | down_write(¤t->mm->mmap_sem); | ||
3203 | ret = do_munmap(current->mm, old.userspace_addr, | 3222 | ret = do_munmap(current->mm, old.userspace_addr, |
3204 | old.npages * PAGE_SIZE); | 3223 | old.npages * PAGE_SIZE); |
3205 | up_write(¤t->mm->mmap_sem); | ||
3206 | if (ret < 0) | 3224 | if (ret < 0) |
3207 | printk(KERN_WARNING | 3225 | printk(KERN_WARNING |
3208 | "kvm_vm_ioctl_set_memory_region: " | 3226 | "kvm_vm_ioctl_set_memory_region: " |