aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/kvm/x86.c116
1 files changed, 43 insertions, 73 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 813ebf1e55a0..0d9a57875f0b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3067,55 +3067,32 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
3067} 3067}
3068 3068
3069/** 3069/**
3070 * write_protect_slot - write protect a slot for dirty logging 3070 * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
3071 * @kvm: the kvm instance 3071 * @kvm: kvm instance
3072 * @memslot: the slot we protect 3072 * @log: slot id and address to which we copy the log
3073 * @dirty_bitmap: the bitmap indicating which pages are dirty
3074 * @nr_dirty_pages: the number of dirty pages
3075 * 3073 *
3076 * We have two ways to find all sptes to protect: 3074 * We need to keep it in mind that VCPU threads can write to the bitmap
3077 * 1. Use kvm_mmu_slot_remove_write_access() which walks all shadow pages and 3075 * concurrently. So, to avoid losing data, we keep the following order for
3078 * checks ones that have a spte mapping a page in the slot. 3076 * each bit:
3079 * 2. Use kvm_mmu_rmap_write_protect() for each gfn found in the bitmap.
3080 * 3077 *
3081 * Generally speaking, if there are not so many dirty pages compared to the 3078 * 1. Take a snapshot of the bit and clear it if needed.
3082 * number of shadow pages, we should use the latter. 3079 * 2. Write protect the corresponding page.
3080 * 3. Flush TLB's if needed.
3081 * 4. Copy the snapshot to the userspace.
3083 * 3082 *
3084 * Note that letting others write into a page marked dirty in the old bitmap 3083 * Between 2 and 3, the guest may write to the page using the remaining TLB
3085 * by using the remaining tlb entry is not a problem. That page will become 3084 * entry. This is not a problem because the page will be reported dirty at
3086 * write protected again when we flush the tlb and then be reported dirty to 3085 * step 4 using the snapshot taken before and step 3 ensures that successive
3087 * the user space by copying the old bitmap. 3086 * writes will be logged for the next call.
3088 */ 3087 */
3089static void write_protect_slot(struct kvm *kvm, 3088int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
3090 struct kvm_memory_slot *memslot,
3091 unsigned long *dirty_bitmap,
3092 unsigned long nr_dirty_pages)
3093{
3094 spin_lock(&kvm->mmu_lock);
3095
3096 /* Not many dirty pages compared to # of shadow pages. */
3097 if (nr_dirty_pages < kvm->arch.n_used_mmu_pages) {
3098 gfn_t offset;
3099
3100 for_each_set_bit(offset, dirty_bitmap, memslot->npages)
3101 kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, 1);
3102
3103 kvm_flush_remote_tlbs(kvm);
3104 } else
3105 kvm_mmu_slot_remove_write_access(kvm, memslot->id);
3106
3107 spin_unlock(&kvm->mmu_lock);
3108}
3109
3110/*
3111 * Get (and clear) the dirty memory log for a memory slot.
3112 */
3113int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
3114 struct kvm_dirty_log *log)
3115{ 3089{
3116 int r; 3090 int r;
3117 struct kvm_memory_slot *memslot; 3091 struct kvm_memory_slot *memslot;
3118 unsigned long n, nr_dirty_pages; 3092 unsigned long n, i;
3093 unsigned long *dirty_bitmap;
3094 unsigned long *dirty_bitmap_buffer;
3095 bool is_dirty = false;
3119 3096
3120 mutex_lock(&kvm->slots_lock); 3097 mutex_lock(&kvm->slots_lock);
3121 3098
@@ -3124,49 +3101,42 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
3124 goto out; 3101 goto out;
3125 3102
3126 memslot = id_to_memslot(kvm->memslots, log->slot); 3103 memslot = id_to_memslot(kvm->memslots, log->slot);
3104
3105 dirty_bitmap = memslot->dirty_bitmap;
3127 r = -ENOENT; 3106 r = -ENOENT;
3128 if (!memslot->dirty_bitmap) 3107 if (!dirty_bitmap)
3129 goto out; 3108 goto out;
3130 3109
3131 n = kvm_dirty_bitmap_bytes(memslot); 3110 n = kvm_dirty_bitmap_bytes(memslot);
3132 nr_dirty_pages = memslot->nr_dirty_pages;
3133 3111
3134 /* If nothing is dirty, don't bother messing with page tables. */ 3112 dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
3135 if (nr_dirty_pages) { 3113 memset(dirty_bitmap_buffer, 0, n);
3136 struct kvm_memslots *slots, *old_slots;
3137 unsigned long *dirty_bitmap, *dirty_bitmap_head;
3138 3114
3139 dirty_bitmap = memslot->dirty_bitmap; 3115 spin_lock(&kvm->mmu_lock);
3140 dirty_bitmap_head = memslot->dirty_bitmap_head;
3141 if (dirty_bitmap == dirty_bitmap_head)
3142 dirty_bitmap_head += n / sizeof(long);
3143 memset(dirty_bitmap_head, 0, n);
3144 3116
3145 r = -ENOMEM; 3117 for (i = 0; i < n / sizeof(long); i++) {
3146 slots = kmemdup(kvm->memslots, sizeof(*kvm->memslots), GFP_KERNEL); 3118 unsigned long mask;
3147 if (!slots) 3119 gfn_t offset;
3148 goto out;
3149 3120
3150 memslot = id_to_memslot(slots, log->slot); 3121 if (!dirty_bitmap[i])
3151 memslot->nr_dirty_pages = 0; 3122 continue;
3152 memslot->dirty_bitmap = dirty_bitmap_head;
3153 update_memslots(slots, NULL);
3154 3123
3155 old_slots = kvm->memslots; 3124 is_dirty = true;
3156 rcu_assign_pointer(kvm->memslots, slots);
3157 synchronize_srcu_expedited(&kvm->srcu);
3158 kfree(old_slots);
3159 3125
3160 write_protect_slot(kvm, memslot, dirty_bitmap, nr_dirty_pages); 3126 mask = xchg(&dirty_bitmap[i], 0);
3127 dirty_bitmap_buffer[i] = mask;
3161 3128
3162 r = -EFAULT; 3129 offset = i * BITS_PER_LONG;
3163 if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) 3130 kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask);
3164 goto out;
3165 } else {
3166 r = -EFAULT;
3167 if (clear_user(log->dirty_bitmap, n))
3168 goto out;
3169 } 3131 }
3132 if (is_dirty)
3133 kvm_flush_remote_tlbs(kvm);
3134
3135 spin_unlock(&kvm->mmu_lock);
3136
3137 r = -EFAULT;
3138 if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
3139 goto out;
3170 3140
3171 r = 0; 3141 r = 0;
3172out: 3142out: