diff options
author | Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp> | 2011-11-14 04:24:50 -0500 |
---|---|---|
committer | Avi Kivity <avi@redhat.com> | 2011-12-27 04:17:20 -0500 |
commit | 95d4c16ce78cb6b7549a09159c409d52ddd18dae (patch) | |
tree | 1291405b107b4caa495454855baeeea5b9baa5e8 /arch/x86/kvm | |
parent | 7850ac5420803996e2960d15b924021f28e0dffc (diff) |
KVM: Optimize dirty logging by rmap_write_protect()
Currently, write protecting a slot needs to walk all the shadow pages
and checks ones which have a pte mapping a page in it.
The walk is overly heavy when dirty pages in that slot are not so many
and checking the shadow pages would result in unwanted cache pollution.
To mitigate this problem, we use rmap_write_protect() and check only
the sptes which can be reached from gfns marked in the dirty bitmap
when the number of dirty pages are less than that of shadow pages.
This criterion is reasonable in its meaning and worked well in our test:
write protection became some times faster than before when the ratio of
dirty pages are low and was not worse even when the ratio was near the
criterion.
Note that the locking for this write protection becomes fine grained.
The reason why this is safe is descripted in the comments.
Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Signed-off-by: Avi Kivity <avi@redhat.com>
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r-- | arch/x86/kvm/mmu.c | 14 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 58 |
2 files changed, 61 insertions, 11 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index fa71085f75a3..aecdea265f7e 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -1023,15 +1023,13 @@ static void drop_spte(struct kvm *kvm, u64 *sptep) | |||
1023 | rmap_remove(kvm, sptep); | 1023 | rmap_remove(kvm, sptep); |
1024 | } | 1024 | } |
1025 | 1025 | ||
1026 | static int rmap_write_protect(struct kvm *kvm, u64 gfn) | 1026 | int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn, |
1027 | struct kvm_memory_slot *slot) | ||
1027 | { | 1028 | { |
1028 | struct kvm_memory_slot *slot; | ||
1029 | unsigned long *rmapp; | 1029 | unsigned long *rmapp; |
1030 | u64 *spte; | 1030 | u64 *spte; |
1031 | int i, write_protected = 0; | 1031 | int i, write_protected = 0; |
1032 | 1032 | ||
1033 | slot = gfn_to_memslot(kvm, gfn); | ||
1034 | |||
1035 | rmapp = __gfn_to_rmap(kvm, gfn, PT_PAGE_TABLE_LEVEL, slot); | 1033 | rmapp = __gfn_to_rmap(kvm, gfn, PT_PAGE_TABLE_LEVEL, slot); |
1036 | spte = rmap_next(kvm, rmapp, NULL); | 1034 | spte = rmap_next(kvm, rmapp, NULL); |
1037 | while (spte) { | 1035 | while (spte) { |
@@ -1066,6 +1064,14 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
1066 | return write_protected; | 1064 | return write_protected; |
1067 | } | 1065 | } |
1068 | 1066 | ||
1067 | static int rmap_write_protect(struct kvm *kvm, u64 gfn) | ||
1068 | { | ||
1069 | struct kvm_memory_slot *slot; | ||
1070 | |||
1071 | slot = gfn_to_memslot(kvm, gfn); | ||
1072 | return kvm_mmu_rmap_write_protect(kvm, gfn, slot); | ||
1073 | } | ||
1074 | |||
1069 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, | 1075 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, |
1070 | unsigned long data) | 1076 | unsigned long data) |
1071 | { | 1077 | { |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 220c83b0fbda..af546b768ffd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -3460,6 +3460,50 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm, | |||
3460 | return 0; | 3460 | return 0; |
3461 | } | 3461 | } |
3462 | 3462 | ||
3463 | /** | ||
3464 | * write_protect_slot - write protect a slot for dirty logging | ||
3465 | * @kvm: the kvm instance | ||
3466 | * @memslot: the slot we protect | ||
3467 | * @dirty_bitmap: the bitmap indicating which pages are dirty | ||
3468 | * @nr_dirty_pages: the number of dirty pages | ||
3469 | * | ||
3470 | * We have two ways to find all sptes to protect: | ||
3471 | * 1. Use kvm_mmu_slot_remove_write_access() which walks all shadow pages and | ||
3472 | * checks ones that have a spte mapping a page in the slot. | ||
3473 | * 2. Use kvm_mmu_rmap_write_protect() for each gfn found in the bitmap. | ||
3474 | * | ||
3475 | * Generally speaking, if there are not so many dirty pages compared to the | ||
3476 | * number of shadow pages, we should use the latter. | ||
3477 | * | ||
3478 | * Note that letting others write into a page marked dirty in the old bitmap | ||
3479 | * by using the remaining tlb entry is not a problem. That page will become | ||
3480 | * write protected again when we flush the tlb and then be reported dirty to | ||
3481 | * the user space by copying the old bitmap. | ||
3482 | */ | ||
3483 | static void write_protect_slot(struct kvm *kvm, | ||
3484 | struct kvm_memory_slot *memslot, | ||
3485 | unsigned long *dirty_bitmap, | ||
3486 | unsigned long nr_dirty_pages) | ||
3487 | { | ||
3488 | /* Not many dirty pages compared to # of shadow pages. */ | ||
3489 | if (nr_dirty_pages < kvm->arch.n_used_mmu_pages) { | ||
3490 | unsigned long gfn_offset; | ||
3491 | |||
3492 | for_each_set_bit(gfn_offset, dirty_bitmap, memslot->npages) { | ||
3493 | unsigned long gfn = memslot->base_gfn + gfn_offset; | ||
3494 | |||
3495 | spin_lock(&kvm->mmu_lock); | ||
3496 | kvm_mmu_rmap_write_protect(kvm, gfn, memslot); | ||
3497 | spin_unlock(&kvm->mmu_lock); | ||
3498 | } | ||
3499 | kvm_flush_remote_tlbs(kvm); | ||
3500 | } else { | ||
3501 | spin_lock(&kvm->mmu_lock); | ||
3502 | kvm_mmu_slot_remove_write_access(kvm, memslot->id); | ||
3503 | spin_unlock(&kvm->mmu_lock); | ||
3504 | } | ||
3505 | } | ||
3506 | |||
3463 | /* | 3507 | /* |
3464 | * Get (and clear) the dirty memory log for a memory slot. | 3508 | * Get (and clear) the dirty memory log for a memory slot. |
3465 | */ | 3509 | */ |
@@ -3468,7 +3512,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
3468 | { | 3512 | { |
3469 | int r; | 3513 | int r; |
3470 | struct kvm_memory_slot *memslot; | 3514 | struct kvm_memory_slot *memslot; |
3471 | unsigned long n; | 3515 | unsigned long n, nr_dirty_pages; |
3472 | 3516 | ||
3473 | mutex_lock(&kvm->slots_lock); | 3517 | mutex_lock(&kvm->slots_lock); |
3474 | 3518 | ||
@@ -3482,9 +3526,10 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
3482 | goto out; | 3526 | goto out; |
3483 | 3527 | ||
3484 | n = kvm_dirty_bitmap_bytes(memslot); | 3528 | n = kvm_dirty_bitmap_bytes(memslot); |
3529 | nr_dirty_pages = memslot->nr_dirty_pages; | ||
3485 | 3530 | ||
3486 | /* If nothing is dirty, don't bother messing with page tables. */ | 3531 | /* If nothing is dirty, don't bother messing with page tables. */ |
3487 | if (memslot->nr_dirty_pages) { | 3532 | if (nr_dirty_pages) { |
3488 | struct kvm_memslots *slots, *old_slots; | 3533 | struct kvm_memslots *slots, *old_slots; |
3489 | unsigned long *dirty_bitmap; | 3534 | unsigned long *dirty_bitmap; |
3490 | 3535 | ||
@@ -3498,8 +3543,9 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
3498 | if (!slots) | 3543 | if (!slots) |
3499 | goto out; | 3544 | goto out; |
3500 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); | 3545 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); |
3501 | slots->memslots[log->slot].dirty_bitmap = dirty_bitmap; | 3546 | memslot = &slots->memslots[log->slot]; |
3502 | slots->memslots[log->slot].nr_dirty_pages = 0; | 3547 | memslot->dirty_bitmap = dirty_bitmap; |
3548 | memslot->nr_dirty_pages = 0; | ||
3503 | slots->generation++; | 3549 | slots->generation++; |
3504 | 3550 | ||
3505 | old_slots = kvm->memslots; | 3551 | old_slots = kvm->memslots; |
@@ -3508,9 +3554,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
3508 | dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap; | 3554 | dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap; |
3509 | kfree(old_slots); | 3555 | kfree(old_slots); |
3510 | 3556 | ||
3511 | spin_lock(&kvm->mmu_lock); | 3557 | write_protect_slot(kvm, memslot, dirty_bitmap, nr_dirty_pages); |
3512 | kvm_mmu_slot_remove_write_access(kvm, log->slot); | ||
3513 | spin_unlock(&kvm->mmu_lock); | ||
3514 | 3558 | ||
3515 | r = -EFAULT; | 3559 | r = -EFAULT; |
3516 | if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) | 3560 | if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) |