aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorTakuya Yoshikawa <takuya.yoshikawa@gmail.com>2012-03-03 00:21:48 -0500
committerAvi Kivity <avi@redhat.com>2012-04-08 05:50:00 -0400
commit60c34612b70711fb14a8dcbc6a79509902450d2e (patch)
treeaaf14cd91200a6b28cb6e720a61e8a4d31cec1d9 /arch/x86
parent5dc99b2380d59b8aeafa98791f92b96400ed3187 (diff)
KVM: Switch to srcu-less get_dirty_log()
We have seen some problems of the current implementation of get_dirty_log() which uses synchronize_srcu_expedited() for updating dirty bitmaps; e.g. it is noticeable that this sometimes gives us ms order of latency when we use VGA displays. Furthermore the recent discussion on the following thread "srcu: Implement call_srcu()" http://lkml.org/lkml/2012/1/31/211 also motivated us to implement get_dirty_log() without SRCU. This patch achieves this goal without sacrificing the performance of both VGA and live migration: in practice the new code is much faster than the old one unless we have too many dirty pages. Implementation: The key part of the implementation is the use of xchg() operation for clearing dirty bits atomically. Since this allows us to update only BITS_PER_LONG pages at once, we need to iterate over the dirty bitmap until every dirty bit is cleared again for the next call. Although some people may worry about the problem of using the atomic memory instruction many times to the concurrently accessible bitmap, it is usually accessed with mmu_lock held and we rarely see concurrent accesses: so what we need to care about is the pure xchg() overheads. Another point to note is that we do not use for_each_set_bit() to check which ones in each BITS_PER_LONG pages are actually dirty. Instead we simply use __ffs() in a loop. This is much faster than repeatedly call find_next_bit(). Performance: The dirty-log-perf unit test showed nice improvements, some times faster than before, except for some extreme cases; for such cases the speed of getting dirty page information is much faster than we process it in the userspace. For real workloads, both VGA and live migration, we have observed pure improvements: when the guest was reading a file during live migration, we originally saw a few ms of latency, but with the new method the latency was less than 200us. Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp> Signed-off-by: Avi Kivity <avi@redhat.com>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/kvm/x86.c116
1 files changed, 43 insertions, 73 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 813ebf1e55a0..0d9a57875f0b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3067,55 +3067,32 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
3067} 3067}
3068 3068
3069/** 3069/**
3070 * write_protect_slot - write protect a slot for dirty logging 3070 * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
3071 * @kvm: the kvm instance 3071 * @kvm: kvm instance
3072 * @memslot: the slot we protect 3072 * @log: slot id and address to which we copy the log
3073 * @dirty_bitmap: the bitmap indicating which pages are dirty
3074 * @nr_dirty_pages: the number of dirty pages
3075 * 3073 *
3076 * We have two ways to find all sptes to protect: 3074 * We need to keep it in mind that VCPU threads can write to the bitmap
3077 * 1. Use kvm_mmu_slot_remove_write_access() which walks all shadow pages and 3075 * concurrently. So, to avoid losing data, we keep the following order for
3078 * checks ones that have a spte mapping a page in the slot. 3076 * each bit:
3079 * 2. Use kvm_mmu_rmap_write_protect() for each gfn found in the bitmap.
3080 * 3077 *
3081 * Generally speaking, if there are not so many dirty pages compared to the 3078 * 1. Take a snapshot of the bit and clear it if needed.
3082 * number of shadow pages, we should use the latter. 3079 * 2. Write protect the corresponding page.
3080 * 3. Flush TLB's if needed.
3081 * 4. Copy the snapshot to the userspace.
3083 * 3082 *
3084 * Note that letting others write into a page marked dirty in the old bitmap 3083 * Between 2 and 3, the guest may write to the page using the remaining TLB
3085 * by using the remaining tlb entry is not a problem. That page will become 3084 * entry. This is not a problem because the page will be reported dirty at
3086 * write protected again when we flush the tlb and then be reported dirty to 3085 * step 4 using the snapshot taken before and step 3 ensures that successive
3087 * the user space by copying the old bitmap. 3086 * writes will be logged for the next call.
3088 */ 3087 */
3089static void write_protect_slot(struct kvm *kvm, 3088int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
3090 struct kvm_memory_slot *memslot,
3091 unsigned long *dirty_bitmap,
3092 unsigned long nr_dirty_pages)
3093{
3094 spin_lock(&kvm->mmu_lock);
3095
3096 /* Not many dirty pages compared to # of shadow pages. */
3097 if (nr_dirty_pages < kvm->arch.n_used_mmu_pages) {
3098 gfn_t offset;
3099
3100 for_each_set_bit(offset, dirty_bitmap, memslot->npages)
3101 kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, 1);
3102
3103 kvm_flush_remote_tlbs(kvm);
3104 } else
3105 kvm_mmu_slot_remove_write_access(kvm, memslot->id);
3106
3107 spin_unlock(&kvm->mmu_lock);
3108}
3109
3110/*
3111 * Get (and clear) the dirty memory log for a memory slot.
3112 */
3113int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
3114 struct kvm_dirty_log *log)
3115{ 3089{
3116 int r; 3090 int r;
3117 struct kvm_memory_slot *memslot; 3091 struct kvm_memory_slot *memslot;
3118 unsigned long n, nr_dirty_pages; 3092 unsigned long n, i;
3093 unsigned long *dirty_bitmap;
3094 unsigned long *dirty_bitmap_buffer;
3095 bool is_dirty = false;
3119 3096
3120 mutex_lock(&kvm->slots_lock); 3097 mutex_lock(&kvm->slots_lock);
3121 3098
@@ -3124,49 +3101,42 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
3124 goto out; 3101 goto out;
3125 3102
3126 memslot = id_to_memslot(kvm->memslots, log->slot); 3103 memslot = id_to_memslot(kvm->memslots, log->slot);
3104
3105 dirty_bitmap = memslot->dirty_bitmap;
3127 r = -ENOENT; 3106 r = -ENOENT;
3128 if (!memslot->dirty_bitmap) 3107 if (!dirty_bitmap)
3129 goto out; 3108 goto out;
3130 3109
3131 n = kvm_dirty_bitmap_bytes(memslot); 3110 n = kvm_dirty_bitmap_bytes(memslot);
3132 nr_dirty_pages = memslot->nr_dirty_pages;
3133 3111
3134 /* If nothing is dirty, don't bother messing with page tables. */ 3112 dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
3135 if (nr_dirty_pages) { 3113 memset(dirty_bitmap_buffer, 0, n);
3136 struct kvm_memslots *slots, *old_slots;
3137 unsigned long *dirty_bitmap, *dirty_bitmap_head;
3138 3114
3139 dirty_bitmap = memslot->dirty_bitmap; 3115 spin_lock(&kvm->mmu_lock);
3140 dirty_bitmap_head = memslot->dirty_bitmap_head;
3141 if (dirty_bitmap == dirty_bitmap_head)
3142 dirty_bitmap_head += n / sizeof(long);
3143 memset(dirty_bitmap_head, 0, n);
3144 3116
3145 r = -ENOMEM; 3117 for (i = 0; i < n / sizeof(long); i++) {
3146 slots = kmemdup(kvm->memslots, sizeof(*kvm->memslots), GFP_KERNEL); 3118 unsigned long mask;
3147 if (!slots) 3119 gfn_t offset;
3148 goto out;
3149 3120
3150 memslot = id_to_memslot(slots, log->slot); 3121 if (!dirty_bitmap[i])
3151 memslot->nr_dirty_pages = 0; 3122 continue;
3152 memslot->dirty_bitmap = dirty_bitmap_head;
3153 update_memslots(slots, NULL);
3154 3123
3155 old_slots = kvm->memslots; 3124 is_dirty = true;
3156 rcu_assign_pointer(kvm->memslots, slots);
3157 synchronize_srcu_expedited(&kvm->srcu);
3158 kfree(old_slots);
3159 3125
3160 write_protect_slot(kvm, memslot, dirty_bitmap, nr_dirty_pages); 3126 mask = xchg(&dirty_bitmap[i], 0);
3127 dirty_bitmap_buffer[i] = mask;
3161 3128
3162 r = -EFAULT; 3129 offset = i * BITS_PER_LONG;
3163 if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) 3130 kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask);
3164 goto out;
3165 } else {
3166 r = -EFAULT;
3167 if (clear_user(log->dirty_bitmap, n))
3168 goto out;
3169 } 3131 }
3132 if (is_dirty)
3133 kvm_flush_remote_tlbs(kvm);
3134
3135 spin_unlock(&kvm->mmu_lock);
3136
3137 r = -EFAULT;
3138 if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
3139 goto out;
3170 3140
3171 r = 0; 3141 r = 0;
3172out: 3142out: