diff options
author | Takuya Yoshikawa <takuya.yoshikawa@gmail.com> | 2012-03-03 00:21:48 -0500 |
---|---|---|
committer | Avi Kivity <avi@redhat.com> | 2012-04-08 05:50:00 -0400 |
commit | 60c34612b70711fb14a8dcbc6a79509902450d2e (patch) | |
tree | aaf14cd91200a6b28cb6e720a61e8a4d31cec1d9 /arch/x86 | |
parent | 5dc99b2380d59b8aeafa98791f92b96400ed3187 (diff) |
KVM: Switch to srcu-less get_dirty_log()
We have seen some problems of the current implementation of
get_dirty_log() which uses synchronize_srcu_expedited() for updating
dirty bitmaps; e.g. it is noticeable that this sometimes gives us ms
order of latency when we use VGA displays.
Furthermore the recent discussion on the following thread
"srcu: Implement call_srcu()"
http://lkml.org/lkml/2012/1/31/211
also motivated us to implement get_dirty_log() without SRCU.
This patch achieves this goal without sacrificing the performance of
both VGA and live migration: in practice the new code is much faster
than the old one unless we have too many dirty pages.
Implementation:
The key part of the implementation is the use of xchg() operation for
clearing dirty bits atomically. Since this allows us to update only
BITS_PER_LONG pages at once, we need to iterate over the dirty bitmap
until every dirty bit is cleared again for the next call.
Although some people may worry about the problem of using the atomic
memory instruction many times to the concurrently accessible bitmap,
it is usually accessed with mmu_lock held and we rarely see concurrent
accesses: so what we need to care about is the pure xchg() overheads.
Another point to note is that we do not use for_each_set_bit() to check
which ones in each BITS_PER_LONG pages are actually dirty. Instead we
simply use __ffs() in a loop. This is much faster than repeatedly call
find_next_bit().
Performance:
The dirty-log-perf unit test showed nice improvements, some times faster
than before, except for some extreme cases; for such cases the speed of
getting dirty page information is much faster than we process it in the
userspace.
For real workloads, both VGA and live migration, we have observed pure
improvements: when the guest was reading a file during live migration,
we originally saw a few ms of latency, but with the new method the
latency was less than 200us.
Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Signed-off-by: Avi Kivity <avi@redhat.com>
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/kvm/x86.c | 116 |
1 files changed, 43 insertions, 73 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 813ebf1e55a0..0d9a57875f0b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -3067,55 +3067,32 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm, | |||
3067 | } | 3067 | } |
3068 | 3068 | ||
3069 | /** | 3069 | /** |
3070 | * write_protect_slot - write protect a slot for dirty logging | 3070 | * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot |
3071 | * @kvm: the kvm instance | 3071 | * @kvm: kvm instance |
3072 | * @memslot: the slot we protect | 3072 | * @log: slot id and address to which we copy the log |
3073 | * @dirty_bitmap: the bitmap indicating which pages are dirty | ||
3074 | * @nr_dirty_pages: the number of dirty pages | ||
3075 | * | 3073 | * |
3076 | * We have two ways to find all sptes to protect: | 3074 | * We need to keep it in mind that VCPU threads can write to the bitmap |
3077 | * 1. Use kvm_mmu_slot_remove_write_access() which walks all shadow pages and | 3075 | * concurrently. So, to avoid losing data, we keep the following order for |
3078 | * checks ones that have a spte mapping a page in the slot. | 3076 | * each bit: |
3079 | * 2. Use kvm_mmu_rmap_write_protect() for each gfn found in the bitmap. | ||
3080 | * | 3077 | * |
3081 | * Generally speaking, if there are not so many dirty pages compared to the | 3078 | * 1. Take a snapshot of the bit and clear it if needed. |
3082 | * number of shadow pages, we should use the latter. | 3079 | * 2. Write protect the corresponding page. |
3080 | * 3. Flush TLB's if needed. | ||
3081 | * 4. Copy the snapshot to the userspace. | ||
3083 | * | 3082 | * |
3084 | * Note that letting others write into a page marked dirty in the old bitmap | 3083 | * Between 2 and 3, the guest may write to the page using the remaining TLB |
3085 | * by using the remaining tlb entry is not a problem. That page will become | 3084 | * entry. This is not a problem because the page will be reported dirty at |
3086 | * write protected again when we flush the tlb and then be reported dirty to | 3085 | * step 4 using the snapshot taken before and step 3 ensures that successive |
3087 | * the user space by copying the old bitmap. | 3086 | * writes will be logged for the next call. |
3088 | */ | 3087 | */ |
3089 | static void write_protect_slot(struct kvm *kvm, | 3088 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) |
3090 | struct kvm_memory_slot *memslot, | ||
3091 | unsigned long *dirty_bitmap, | ||
3092 | unsigned long nr_dirty_pages) | ||
3093 | { | ||
3094 | spin_lock(&kvm->mmu_lock); | ||
3095 | |||
3096 | /* Not many dirty pages compared to # of shadow pages. */ | ||
3097 | if (nr_dirty_pages < kvm->arch.n_used_mmu_pages) { | ||
3098 | gfn_t offset; | ||
3099 | |||
3100 | for_each_set_bit(offset, dirty_bitmap, memslot->npages) | ||
3101 | kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, 1); | ||
3102 | |||
3103 | kvm_flush_remote_tlbs(kvm); | ||
3104 | } else | ||
3105 | kvm_mmu_slot_remove_write_access(kvm, memslot->id); | ||
3106 | |||
3107 | spin_unlock(&kvm->mmu_lock); | ||
3108 | } | ||
3109 | |||
3110 | /* | ||
3111 | * Get (and clear) the dirty memory log for a memory slot. | ||
3112 | */ | ||
3113 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | ||
3114 | struct kvm_dirty_log *log) | ||
3115 | { | 3089 | { |
3116 | int r; | 3090 | int r; |
3117 | struct kvm_memory_slot *memslot; | 3091 | struct kvm_memory_slot *memslot; |
3118 | unsigned long n, nr_dirty_pages; | 3092 | unsigned long n, i; |
3093 | unsigned long *dirty_bitmap; | ||
3094 | unsigned long *dirty_bitmap_buffer; | ||
3095 | bool is_dirty = false; | ||
3119 | 3096 | ||
3120 | mutex_lock(&kvm->slots_lock); | 3097 | mutex_lock(&kvm->slots_lock); |
3121 | 3098 | ||
@@ -3124,49 +3101,42 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
3124 | goto out; | 3101 | goto out; |
3125 | 3102 | ||
3126 | memslot = id_to_memslot(kvm->memslots, log->slot); | 3103 | memslot = id_to_memslot(kvm->memslots, log->slot); |
3104 | |||
3105 | dirty_bitmap = memslot->dirty_bitmap; | ||
3127 | r = -ENOENT; | 3106 | r = -ENOENT; |
3128 | if (!memslot->dirty_bitmap) | 3107 | if (!dirty_bitmap) |
3129 | goto out; | 3108 | goto out; |
3130 | 3109 | ||
3131 | n = kvm_dirty_bitmap_bytes(memslot); | 3110 | n = kvm_dirty_bitmap_bytes(memslot); |
3132 | nr_dirty_pages = memslot->nr_dirty_pages; | ||
3133 | 3111 | ||
3134 | /* If nothing is dirty, don't bother messing with page tables. */ | 3112 | dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long); |
3135 | if (nr_dirty_pages) { | 3113 | memset(dirty_bitmap_buffer, 0, n); |
3136 | struct kvm_memslots *slots, *old_slots; | ||
3137 | unsigned long *dirty_bitmap, *dirty_bitmap_head; | ||
3138 | 3114 | ||
3139 | dirty_bitmap = memslot->dirty_bitmap; | 3115 | spin_lock(&kvm->mmu_lock); |
3140 | dirty_bitmap_head = memslot->dirty_bitmap_head; | ||
3141 | if (dirty_bitmap == dirty_bitmap_head) | ||
3142 | dirty_bitmap_head += n / sizeof(long); | ||
3143 | memset(dirty_bitmap_head, 0, n); | ||
3144 | 3116 | ||
3145 | r = -ENOMEM; | 3117 | for (i = 0; i < n / sizeof(long); i++) { |
3146 | slots = kmemdup(kvm->memslots, sizeof(*kvm->memslots), GFP_KERNEL); | 3118 | unsigned long mask; |
3147 | if (!slots) | 3119 | gfn_t offset; |
3148 | goto out; | ||
3149 | 3120 | ||
3150 | memslot = id_to_memslot(slots, log->slot); | 3121 | if (!dirty_bitmap[i]) |
3151 | memslot->nr_dirty_pages = 0; | 3122 | continue; |
3152 | memslot->dirty_bitmap = dirty_bitmap_head; | ||
3153 | update_memslots(slots, NULL); | ||
3154 | 3123 | ||
3155 | old_slots = kvm->memslots; | 3124 | is_dirty = true; |
3156 | rcu_assign_pointer(kvm->memslots, slots); | ||
3157 | synchronize_srcu_expedited(&kvm->srcu); | ||
3158 | kfree(old_slots); | ||
3159 | 3125 | ||
3160 | write_protect_slot(kvm, memslot, dirty_bitmap, nr_dirty_pages); | 3126 | mask = xchg(&dirty_bitmap[i], 0); |
3127 | dirty_bitmap_buffer[i] = mask; | ||
3161 | 3128 | ||
3162 | r = -EFAULT; | 3129 | offset = i * BITS_PER_LONG; |
3163 | if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) | 3130 | kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask); |
3164 | goto out; | ||
3165 | } else { | ||
3166 | r = -EFAULT; | ||
3167 | if (clear_user(log->dirty_bitmap, n)) | ||
3168 | goto out; | ||
3169 | } | 3131 | } |
3132 | if (is_dirty) | ||
3133 | kvm_flush_remote_tlbs(kvm); | ||
3134 | |||
3135 | spin_unlock(&kvm->mmu_lock); | ||
3136 | |||
3137 | r = -EFAULT; | ||
3138 | if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n)) | ||
3139 | goto out; | ||
3170 | 3140 | ||
3171 | r = 0; | 3141 | r = 0; |
3172 | out: | 3142 | out: |