diff options
| author | Andrea Arcangeli <andrea@qumranet.com> | 2008-07-25 10:24:52 -0400 |
|---|---|---|
| committer | Avi Kivity <avi@qumranet.com> | 2008-07-29 05:33:53 -0400 |
| commit | e930bffe95e1e886a1ede80726ea38df5838d067 (patch) | |
| tree | d39227c3de8e7d4a70737c78693f6d7f458066af /virt/kvm | |
| parent | 604b38ac0369bd50fcbb33344aa5553c071009f7 (diff) | |
KVM: Synchronize guest physical memory map to host virtual memory map
Synchronize changes to host virtual addresses which are part of
a KVM memory slot to the KVM shadow mmu. This allows pte operations
like swapping, page migration, and madvise() to transparently work
with KVM.
Signed-off-by: Andrea Arcangeli <andrea@qumranet.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
Diffstat (limited to 'virt/kvm')
| -rw-r--r-- | virt/kvm/kvm_main.c | 135 |
1 files changed, 135 insertions, 0 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 3735212cd3f8..7dd9b0b85e4e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
| @@ -192,6 +192,123 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu) | |||
| 192 | } | 192 | } |
| 193 | EXPORT_SYMBOL_GPL(kvm_vcpu_uninit); | 193 | EXPORT_SYMBOL_GPL(kvm_vcpu_uninit); |
| 194 | 194 | ||
| 195 | #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) | ||
| 196 | static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) | ||
| 197 | { | ||
| 198 | return container_of(mn, struct kvm, mmu_notifier); | ||
| 199 | } | ||
| 200 | |||
| 201 | static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, | ||
| 202 | struct mm_struct *mm, | ||
| 203 | unsigned long address) | ||
| 204 | { | ||
| 205 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | ||
| 206 | int need_tlb_flush; | ||
| 207 | |||
| 208 | /* | ||
| 209 | * When ->invalidate_page runs, the linux pte has been zapped | ||
| 210 | * already but the page is still allocated until | ||
| 211 | * ->invalidate_page returns. So if we increase the sequence | ||
| 212 | * here the kvm page fault will notice if the spte can't be | ||
| 213 | * established because the page is going to be freed. If | ||
| 214 | * instead the kvm page fault establishes the spte before | ||
| 215 | * ->invalidate_page runs, kvm_unmap_hva will release it | ||
| 216 | * before returning. | ||
| 217 | * | ||
| 218 | * The sequence increase only need to be seen at spin_unlock | ||
| 219 | * time, and not at spin_lock time. | ||
| 220 | * | ||
| 221 | * Increasing the sequence after the spin_unlock would be | ||
| 222 | * unsafe because the kvm page fault could then establish the | ||
| 223 | * pte after kvm_unmap_hva returned, without noticing the page | ||
| 224 | * is going to be freed. | ||
| 225 | */ | ||
| 226 | spin_lock(&kvm->mmu_lock); | ||
| 227 | kvm->mmu_notifier_seq++; | ||
| 228 | need_tlb_flush = kvm_unmap_hva(kvm, address); | ||
| 229 | spin_unlock(&kvm->mmu_lock); | ||
| 230 | |||
| 231 | /* we've to flush the tlb before the pages can be freed */ | ||
| 232 | if (need_tlb_flush) | ||
| 233 | kvm_flush_remote_tlbs(kvm); | ||
| 234 | |||
| 235 | } | ||
| 236 | |||
| 237 | static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, | ||
| 238 | struct mm_struct *mm, | ||
| 239 | unsigned long start, | ||
| 240 | unsigned long end) | ||
| 241 | { | ||
| 242 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | ||
| 243 | int need_tlb_flush = 0; | ||
| 244 | |||
| 245 | spin_lock(&kvm->mmu_lock); | ||
| 246 | /* | ||
| 247 | * The count increase must become visible at unlock time as no | ||
| 248 | * spte can be established without taking the mmu_lock and | ||
| 249 | * count is also read inside the mmu_lock critical section. | ||
| 250 | */ | ||
| 251 | kvm->mmu_notifier_count++; | ||
| 252 | for (; start < end; start += PAGE_SIZE) | ||
| 253 | need_tlb_flush |= kvm_unmap_hva(kvm, start); | ||
| 254 | spin_unlock(&kvm->mmu_lock); | ||
| 255 | |||
| 256 | /* we've to flush the tlb before the pages can be freed */ | ||
| 257 | if (need_tlb_flush) | ||
| 258 | kvm_flush_remote_tlbs(kvm); | ||
| 259 | } | ||
| 260 | |||
| 261 | static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, | ||
| 262 | struct mm_struct *mm, | ||
| 263 | unsigned long start, | ||
| 264 | unsigned long end) | ||
| 265 | { | ||
| 266 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | ||
| 267 | |||
| 268 | spin_lock(&kvm->mmu_lock); | ||
| 269 | /* | ||
| 270 | * This sequence increase will notify the kvm page fault that | ||
| 271 | * the page that is going to be mapped in the spte could have | ||
| 272 | * been freed. | ||
| 273 | */ | ||
| 274 | kvm->mmu_notifier_seq++; | ||
| 275 | /* | ||
| 276 | * The above sequence increase must be visible before the | ||
| 277 | * below count decrease but both values are read by the kvm | ||
| 278 | * page fault under mmu_lock spinlock so we don't need to add | ||
| 279 | * a smb_wmb() here in between the two. | ||
| 280 | */ | ||
| 281 | kvm->mmu_notifier_count--; | ||
| 282 | spin_unlock(&kvm->mmu_lock); | ||
| 283 | |||
| 284 | BUG_ON(kvm->mmu_notifier_count < 0); | ||
| 285 | } | ||
| 286 | |||
| 287 | static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, | ||
| 288 | struct mm_struct *mm, | ||
| 289 | unsigned long address) | ||
| 290 | { | ||
| 291 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | ||
| 292 | int young; | ||
| 293 | |||
| 294 | spin_lock(&kvm->mmu_lock); | ||
| 295 | young = kvm_age_hva(kvm, address); | ||
| 296 | spin_unlock(&kvm->mmu_lock); | ||
| 297 | |||
| 298 | if (young) | ||
| 299 | kvm_flush_remote_tlbs(kvm); | ||
| 300 | |||
| 301 | return young; | ||
| 302 | } | ||
| 303 | |||
| 304 | static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { | ||
| 305 | .invalidate_page = kvm_mmu_notifier_invalidate_page, | ||
| 306 | .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start, | ||
| 307 | .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, | ||
| 308 | .clear_flush_young = kvm_mmu_notifier_clear_flush_young, | ||
| 309 | }; | ||
| 310 | #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ | ||
| 311 | |||
| 195 | static struct kvm *kvm_create_vm(void) | 312 | static struct kvm *kvm_create_vm(void) |
| 196 | { | 313 | { |
| 197 | struct kvm *kvm = kvm_arch_create_vm(); | 314 | struct kvm *kvm = kvm_arch_create_vm(); |
| @@ -212,6 +329,21 @@ static struct kvm *kvm_create_vm(void) | |||
| 212 | (struct kvm_coalesced_mmio_ring *)page_address(page); | 329 | (struct kvm_coalesced_mmio_ring *)page_address(page); |
| 213 | #endif | 330 | #endif |
| 214 | 331 | ||
| 332 | #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) | ||
| 333 | { | ||
| 334 | int err; | ||
| 335 | kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops; | ||
| 336 | err = mmu_notifier_register(&kvm->mmu_notifier, current->mm); | ||
| 337 | if (err) { | ||
| 338 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | ||
| 339 | put_page(page); | ||
| 340 | #endif | ||
| 341 | kfree(kvm); | ||
| 342 | return ERR_PTR(err); | ||
| 343 | } | ||
| 344 | } | ||
| 345 | #endif | ||
| 346 | |||
| 215 | kvm->mm = current->mm; | 347 | kvm->mm = current->mm; |
| 216 | atomic_inc(&kvm->mm->mm_count); | 348 | atomic_inc(&kvm->mm->mm_count); |
| 217 | spin_lock_init(&kvm->mmu_lock); | 349 | spin_lock_init(&kvm->mmu_lock); |
| @@ -272,6 +404,9 @@ static void kvm_destroy_vm(struct kvm *kvm) | |||
| 272 | if (kvm->coalesced_mmio_ring != NULL) | 404 | if (kvm->coalesced_mmio_ring != NULL) |
| 273 | free_page((unsigned long)kvm->coalesced_mmio_ring); | 405 | free_page((unsigned long)kvm->coalesced_mmio_ring); |
| 274 | #endif | 406 | #endif |
| 407 | #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) | ||
| 408 | mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); | ||
| 409 | #endif | ||
| 275 | kvm_arch_destroy_vm(kvm); | 410 | kvm_arch_destroy_vm(kvm); |
| 276 | mmdrop(mm); | 411 | mmdrop(mm); |
| 277 | } | 412 | } |
