diff options
-rw-r--r-- | arch/x86/kvm/x86.c | 116 |
1 files changed, 43 insertions, 73 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 813ebf1e55a0..0d9a57875f0b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -3067,55 +3067,32 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm, | |||
3067 | } | 3067 | } |
3068 | 3068 | ||
3069 | /** | 3069 | /** |
3070 | * write_protect_slot - write protect a slot for dirty logging | 3070 | * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot |
3071 | * @kvm: the kvm instance | 3071 | * @kvm: kvm instance |
3072 | * @memslot: the slot we protect | 3072 | * @log: slot id and address to which we copy the log |
3073 | * @dirty_bitmap: the bitmap indicating which pages are dirty | ||
3074 | * @nr_dirty_pages: the number of dirty pages | ||
3075 | * | 3073 | * |
3076 | * We have two ways to find all sptes to protect: | 3074 | * We need to keep it in mind that VCPU threads can write to the bitmap |
3077 | * 1. Use kvm_mmu_slot_remove_write_access() which walks all shadow pages and | 3075 | * concurrently. So, to avoid losing data, we keep the following order for |
3078 | * checks ones that have a spte mapping a page in the slot. | 3076 | * each bit: |
3079 | * 2. Use kvm_mmu_rmap_write_protect() for each gfn found in the bitmap. | ||
3080 | * | 3077 | * |
3081 | * Generally speaking, if there are not so many dirty pages compared to the | 3078 | * 1. Take a snapshot of the bit and clear it if needed. |
3082 | * number of shadow pages, we should use the latter. | 3079 | * 2. Write protect the corresponding page. |
3080 | * 3. Flush TLB's if needed. | ||
3081 | * 4. Copy the snapshot to the userspace. | ||
3083 | * | 3082 | * |
3084 | * Note that letting others write into a page marked dirty in the old bitmap | 3083 | * Between 2 and 3, the guest may write to the page using the remaining TLB |
3085 | * by using the remaining tlb entry is not a problem. That page will become | 3084 | * entry. This is not a problem because the page will be reported dirty at |
3086 | * write protected again when we flush the tlb and then be reported dirty to | 3085 | * step 4 using the snapshot taken before and step 3 ensures that successive |
3087 | * the user space by copying the old bitmap. | 3086 | * writes will be logged for the next call. |
3088 | */ | 3087 | */ |
3089 | static void write_protect_slot(struct kvm *kvm, | 3088 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) |
3090 | struct kvm_memory_slot *memslot, | ||
3091 | unsigned long *dirty_bitmap, | ||
3092 | unsigned long nr_dirty_pages) | ||
3093 | { | ||
3094 | spin_lock(&kvm->mmu_lock); | ||
3095 | |||
3096 | /* Not many dirty pages compared to # of shadow pages. */ | ||
3097 | if (nr_dirty_pages < kvm->arch.n_used_mmu_pages) { | ||
3098 | gfn_t offset; | ||
3099 | |||
3100 | for_each_set_bit(offset, dirty_bitmap, memslot->npages) | ||
3101 | kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, 1); | ||
3102 | |||
3103 | kvm_flush_remote_tlbs(kvm); | ||
3104 | } else | ||
3105 | kvm_mmu_slot_remove_write_access(kvm, memslot->id); | ||
3106 | |||
3107 | spin_unlock(&kvm->mmu_lock); | ||
3108 | } | ||
3109 | |||
3110 | /* | ||
3111 | * Get (and clear) the dirty memory log for a memory slot. | ||
3112 | */ | ||
3113 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | ||
3114 | struct kvm_dirty_log *log) | ||
3115 | { | 3089 | { |
3116 | int r; | 3090 | int r; |
3117 | struct kvm_memory_slot *memslot; | 3091 | struct kvm_memory_slot *memslot; |
3118 | unsigned long n, nr_dirty_pages; | 3092 | unsigned long n, i; |
3093 | unsigned long *dirty_bitmap; | ||
3094 | unsigned long *dirty_bitmap_buffer; | ||
3095 | bool is_dirty = false; | ||
3119 | 3096 | ||
3120 | mutex_lock(&kvm->slots_lock); | 3097 | mutex_lock(&kvm->slots_lock); |
3121 | 3098 | ||
@@ -3124,49 +3101,42 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
3124 | goto out; | 3101 | goto out; |
3125 | 3102 | ||
3126 | memslot = id_to_memslot(kvm->memslots, log->slot); | 3103 | memslot = id_to_memslot(kvm->memslots, log->slot); |
3104 | |||
3105 | dirty_bitmap = memslot->dirty_bitmap; | ||
3127 | r = -ENOENT; | 3106 | r = -ENOENT; |
3128 | if (!memslot->dirty_bitmap) | 3107 | if (!dirty_bitmap) |
3129 | goto out; | 3108 | goto out; |
3130 | 3109 | ||
3131 | n = kvm_dirty_bitmap_bytes(memslot); | 3110 | n = kvm_dirty_bitmap_bytes(memslot); |
3132 | nr_dirty_pages = memslot->nr_dirty_pages; | ||
3133 | 3111 | ||
3134 | /* If nothing is dirty, don't bother messing with page tables. */ | 3112 | dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long); |
3135 | if (nr_dirty_pages) { | 3113 | memset(dirty_bitmap_buffer, 0, n); |
3136 | struct kvm_memslots *slots, *old_slots; | ||
3137 | unsigned long *dirty_bitmap, *dirty_bitmap_head; | ||
3138 | 3114 | ||
3139 | dirty_bitmap = memslot->dirty_bitmap; | 3115 | spin_lock(&kvm->mmu_lock); |
3140 | dirty_bitmap_head = memslot->dirty_bitmap_head; | ||
3141 | if (dirty_bitmap == dirty_bitmap_head) | ||
3142 | dirty_bitmap_head += n / sizeof(long); | ||
3143 | memset(dirty_bitmap_head, 0, n); | ||
3144 | 3116 | ||
3145 | r = -ENOMEM; | 3117 | for (i = 0; i < n / sizeof(long); i++) { |
3146 | slots = kmemdup(kvm->memslots, sizeof(*kvm->memslots), GFP_KERNEL); | 3118 | unsigned long mask; |
3147 | if (!slots) | 3119 | gfn_t offset; |
3148 | goto out; | ||
3149 | 3120 | ||
3150 | memslot = id_to_memslot(slots, log->slot); | 3121 | if (!dirty_bitmap[i]) |
3151 | memslot->nr_dirty_pages = 0; | 3122 | continue; |
3152 | memslot->dirty_bitmap = dirty_bitmap_head; | ||
3153 | update_memslots(slots, NULL); | ||
3154 | 3123 | ||
3155 | old_slots = kvm->memslots; | 3124 | is_dirty = true; |
3156 | rcu_assign_pointer(kvm->memslots, slots); | ||
3157 | synchronize_srcu_expedited(&kvm->srcu); | ||
3158 | kfree(old_slots); | ||
3159 | 3125 | ||
3160 | write_protect_slot(kvm, memslot, dirty_bitmap, nr_dirty_pages); | 3126 | mask = xchg(&dirty_bitmap[i], 0); |
3127 | dirty_bitmap_buffer[i] = mask; | ||
3161 | 3128 | ||
3162 | r = -EFAULT; | 3129 | offset = i * BITS_PER_LONG; |
3163 | if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) | 3130 | kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask); |
3164 | goto out; | ||
3165 | } else { | ||
3166 | r = -EFAULT; | ||
3167 | if (clear_user(log->dirty_bitmap, n)) | ||
3168 | goto out; | ||
3169 | } | 3131 | } |
3132 | if (is_dirty) | ||
3133 | kvm_flush_remote_tlbs(kvm); | ||
3134 | |||
3135 | spin_unlock(&kvm->mmu_lock); | ||
3136 | |||
3137 | r = -EFAULT; | ||
3138 | if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n)) | ||
3139 | goto out; | ||
3170 | 3140 | ||
3171 | r = 0; | 3141 | r = 0; |
3172 | out: | 3142 | out: |