diff options
author | Andrea Arcangeli <andrea@qumranet.com> | 2008-07-25 10:24:52 -0400 |
---|---|---|
committer | Avi Kivity <avi@qumranet.com> | 2008-07-29 05:33:53 -0400 |
commit | e930bffe95e1e886a1ede80726ea38df5838d067 (patch) | |
tree | d39227c3de8e7d4a70737c78693f6d7f458066af /virt/kvm/kvm_main.c | |
parent | 604b38ac0369bd50fcbb33344aa5553c071009f7 (diff) |
KVM: Synchronize guest physical memory map to host virtual memory map
Synchronize changes to host virtual addresses which are part of
a KVM memory slot to the KVM shadow mmu. This allows pte operations
like swapping, page migration, and madvise() to transparently work
with KVM.
Signed-off-by: Andrea Arcangeli <andrea@qumranet.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
Diffstat (limited to 'virt/kvm/kvm_main.c')
-rw-r--r-- | virt/kvm/kvm_main.c | 135 |
1 files changed, 135 insertions, 0 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 3735212cd3f8..7dd9b0b85e4e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -192,6 +192,123 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu) | |||
192 | } | 192 | } |
193 | EXPORT_SYMBOL_GPL(kvm_vcpu_uninit); | 193 | EXPORT_SYMBOL_GPL(kvm_vcpu_uninit); |
194 | 194 | ||
195 | #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) | ||
196 | static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) | ||
197 | { | ||
198 | return container_of(mn, struct kvm, mmu_notifier); | ||
199 | } | ||
200 | |||
201 | static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, | ||
202 | struct mm_struct *mm, | ||
203 | unsigned long address) | ||
204 | { | ||
205 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | ||
206 | int need_tlb_flush; | ||
207 | |||
208 | /* | ||
209 | * When ->invalidate_page runs, the linux pte has been zapped | ||
210 | * already but the page is still allocated until | ||
211 | * ->invalidate_page returns. So if we increase the sequence | ||
212 | * here the kvm page fault will notice if the spte can't be | ||
213 | * established because the page is going to be freed. If | ||
214 | * instead the kvm page fault establishes the spte before | ||
215 | * ->invalidate_page runs, kvm_unmap_hva will release it | ||
216 | * before returning. | ||
217 | * | ||
218 | * The sequence increase only need to be seen at spin_unlock | ||
219 | * time, and not at spin_lock time. | ||
220 | * | ||
221 | * Increasing the sequence after the spin_unlock would be | ||
222 | * unsafe because the kvm page fault could then establish the | ||
223 | * pte after kvm_unmap_hva returned, without noticing the page | ||
224 | * is going to be freed. | ||
225 | */ | ||
226 | spin_lock(&kvm->mmu_lock); | ||
227 | kvm->mmu_notifier_seq++; | ||
228 | need_tlb_flush = kvm_unmap_hva(kvm, address); | ||
229 | spin_unlock(&kvm->mmu_lock); | ||
230 | |||
231 | /* we've to flush the tlb before the pages can be freed */ | ||
232 | if (need_tlb_flush) | ||
233 | kvm_flush_remote_tlbs(kvm); | ||
234 | |||
235 | } | ||
236 | |||
237 | static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, | ||
238 | struct mm_struct *mm, | ||
239 | unsigned long start, | ||
240 | unsigned long end) | ||
241 | { | ||
242 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | ||
243 | int need_tlb_flush = 0; | ||
244 | |||
245 | spin_lock(&kvm->mmu_lock); | ||
246 | /* | ||
247 | * The count increase must become visible at unlock time as no | ||
248 | * spte can be established without taking the mmu_lock and | ||
249 | * count is also read inside the mmu_lock critical section. | ||
250 | */ | ||
251 | kvm->mmu_notifier_count++; | ||
252 | for (; start < end; start += PAGE_SIZE) | ||
253 | need_tlb_flush |= kvm_unmap_hva(kvm, start); | ||
254 | spin_unlock(&kvm->mmu_lock); | ||
255 | |||
256 | /* we've to flush the tlb before the pages can be freed */ | ||
257 | if (need_tlb_flush) | ||
258 | kvm_flush_remote_tlbs(kvm); | ||
259 | } | ||
260 | |||
261 | static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, | ||
262 | struct mm_struct *mm, | ||
263 | unsigned long start, | ||
264 | unsigned long end) | ||
265 | { | ||
266 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | ||
267 | |||
268 | spin_lock(&kvm->mmu_lock); | ||
269 | /* | ||
270 | * This sequence increase will notify the kvm page fault that | ||
271 | * the page that is going to be mapped in the spte could have | ||
272 | * been freed. | ||
273 | */ | ||
274 | kvm->mmu_notifier_seq++; | ||
275 | /* | ||
276 | * The above sequence increase must be visible before the | ||
277 | * below count decrease but both values are read by the kvm | ||
278 | * page fault under mmu_lock spinlock so we don't need to add | ||
279 | * a smb_wmb() here in between the two. | ||
280 | */ | ||
281 | kvm->mmu_notifier_count--; | ||
282 | spin_unlock(&kvm->mmu_lock); | ||
283 | |||
284 | BUG_ON(kvm->mmu_notifier_count < 0); | ||
285 | } | ||
286 | |||
287 | static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, | ||
288 | struct mm_struct *mm, | ||
289 | unsigned long address) | ||
290 | { | ||
291 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | ||
292 | int young; | ||
293 | |||
294 | spin_lock(&kvm->mmu_lock); | ||
295 | young = kvm_age_hva(kvm, address); | ||
296 | spin_unlock(&kvm->mmu_lock); | ||
297 | |||
298 | if (young) | ||
299 | kvm_flush_remote_tlbs(kvm); | ||
300 | |||
301 | return young; | ||
302 | } | ||
303 | |||
304 | static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { | ||
305 | .invalidate_page = kvm_mmu_notifier_invalidate_page, | ||
306 | .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start, | ||
307 | .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, | ||
308 | .clear_flush_young = kvm_mmu_notifier_clear_flush_young, | ||
309 | }; | ||
310 | #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ | ||
311 | |||
195 | static struct kvm *kvm_create_vm(void) | 312 | static struct kvm *kvm_create_vm(void) |
196 | { | 313 | { |
197 | struct kvm *kvm = kvm_arch_create_vm(); | 314 | struct kvm *kvm = kvm_arch_create_vm(); |
@@ -212,6 +329,21 @@ static struct kvm *kvm_create_vm(void) | |||
212 | (struct kvm_coalesced_mmio_ring *)page_address(page); | 329 | (struct kvm_coalesced_mmio_ring *)page_address(page); |
213 | #endif | 330 | #endif |
214 | 331 | ||
332 | #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) | ||
333 | { | ||
334 | int err; | ||
335 | kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops; | ||
336 | err = mmu_notifier_register(&kvm->mmu_notifier, current->mm); | ||
337 | if (err) { | ||
338 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | ||
339 | put_page(page); | ||
340 | #endif | ||
341 | kfree(kvm); | ||
342 | return ERR_PTR(err); | ||
343 | } | ||
344 | } | ||
345 | #endif | ||
346 | |||
215 | kvm->mm = current->mm; | 347 | kvm->mm = current->mm; |
216 | atomic_inc(&kvm->mm->mm_count); | 348 | atomic_inc(&kvm->mm->mm_count); |
217 | spin_lock_init(&kvm->mmu_lock); | 349 | spin_lock_init(&kvm->mmu_lock); |
@@ -272,6 +404,9 @@ static void kvm_destroy_vm(struct kvm *kvm) | |||
272 | if (kvm->coalesced_mmio_ring != NULL) | 404 | if (kvm->coalesced_mmio_ring != NULL) |
273 | free_page((unsigned long)kvm->coalesced_mmio_ring); | 405 | free_page((unsigned long)kvm->coalesced_mmio_ring); |
274 | #endif | 406 | #endif |
407 | #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) | ||
408 | mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); | ||
409 | #endif | ||
275 | kvm_arch_destroy_vm(kvm); | 410 | kvm_arch_destroy_vm(kvm); |
276 | mmdrop(mm); | 411 | mmdrop(mm); |
277 | } | 412 | } |