diff options
author | Russell King <rmk@dyn-67.arm.linux.org.uk> | 2008-08-07 04:55:03 -0400 |
---|---|---|
committer | Russell King <rmk+kernel@arm.linux.org.uk> | 2008-08-07 04:55:03 -0400 |
commit | 4fb8af10d0fd09372d52966b76922b9e82bbc950 (patch) | |
tree | d240e4d40357583e3f3eb228dccf20122a5b31ed /virt/kvm/kvm_main.c | |
parent | f44f82e8a20b98558486eb14497b2f71c78fa325 (diff) | |
parent | 64a99d2a8c3ed5c4e39f3ae1cc682aa8fd3977fc (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/sam/kbuild-fixes
Diffstat (limited to 'virt/kvm/kvm_main.c')
-rw-r--r-- | virt/kvm/kvm_main.c | 155 |
1 files changed, 151 insertions, 4 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a845890b6800..7dd9b0b85e4e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -192,6 +192,123 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu) | |||
192 | } | 192 | } |
193 | EXPORT_SYMBOL_GPL(kvm_vcpu_uninit); | 193 | EXPORT_SYMBOL_GPL(kvm_vcpu_uninit); |
194 | 194 | ||
195 | #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) | ||
196 | static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) | ||
197 | { | ||
198 | return container_of(mn, struct kvm, mmu_notifier); | ||
199 | } | ||
200 | |||
201 | static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, | ||
202 | struct mm_struct *mm, | ||
203 | unsigned long address) | ||
204 | { | ||
205 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | ||
206 | int need_tlb_flush; | ||
207 | |||
208 | /* | ||
209 | * When ->invalidate_page runs, the linux pte has been zapped | ||
210 | * already but the page is still allocated until | ||
211 | * ->invalidate_page returns. So if we increase the sequence | ||
212 | * here the kvm page fault will notice if the spte can't be | ||
213 | * established because the page is going to be freed. If | ||
214 | * instead the kvm page fault establishes the spte before | ||
215 | * ->invalidate_page runs, kvm_unmap_hva will release it | ||
216 | * before returning. | ||
217 | * | ||
218 | * The sequence increase only need to be seen at spin_unlock | ||
219 | * time, and not at spin_lock time. | ||
220 | * | ||
221 | * Increasing the sequence after the spin_unlock would be | ||
222 | * unsafe because the kvm page fault could then establish the | ||
223 | * pte after kvm_unmap_hva returned, without noticing the page | ||
224 | * is going to be freed. | ||
225 | */ | ||
226 | spin_lock(&kvm->mmu_lock); | ||
227 | kvm->mmu_notifier_seq++; | ||
228 | need_tlb_flush = kvm_unmap_hva(kvm, address); | ||
229 | spin_unlock(&kvm->mmu_lock); | ||
230 | |||
231 | /* we've to flush the tlb before the pages can be freed */ | ||
232 | if (need_tlb_flush) | ||
233 | kvm_flush_remote_tlbs(kvm); | ||
234 | |||
235 | } | ||
236 | |||
237 | static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, | ||
238 | struct mm_struct *mm, | ||
239 | unsigned long start, | ||
240 | unsigned long end) | ||
241 | { | ||
242 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | ||
243 | int need_tlb_flush = 0; | ||
244 | |||
245 | spin_lock(&kvm->mmu_lock); | ||
246 | /* | ||
247 | * The count increase must become visible at unlock time as no | ||
248 | * spte can be established without taking the mmu_lock and | ||
249 | * count is also read inside the mmu_lock critical section. | ||
250 | */ | ||
251 | kvm->mmu_notifier_count++; | ||
252 | for (; start < end; start += PAGE_SIZE) | ||
253 | need_tlb_flush |= kvm_unmap_hva(kvm, start); | ||
254 | spin_unlock(&kvm->mmu_lock); | ||
255 | |||
256 | /* we've to flush the tlb before the pages can be freed */ | ||
257 | if (need_tlb_flush) | ||
258 | kvm_flush_remote_tlbs(kvm); | ||
259 | } | ||
260 | |||
261 | static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, | ||
262 | struct mm_struct *mm, | ||
263 | unsigned long start, | ||
264 | unsigned long end) | ||
265 | { | ||
266 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | ||
267 | |||
268 | spin_lock(&kvm->mmu_lock); | ||
269 | /* | ||
270 | * This sequence increase will notify the kvm page fault that | ||
271 | * the page that is going to be mapped in the spte could have | ||
272 | * been freed. | ||
273 | */ | ||
274 | kvm->mmu_notifier_seq++; | ||
275 | /* | ||
276 | * The above sequence increase must be visible before the | ||
277 | * below count decrease but both values are read by the kvm | ||
278 | * page fault under mmu_lock spinlock so we don't need to add | ||
279 | * a smb_wmb() here in between the two. | ||
280 | */ | ||
281 | kvm->mmu_notifier_count--; | ||
282 | spin_unlock(&kvm->mmu_lock); | ||
283 | |||
284 | BUG_ON(kvm->mmu_notifier_count < 0); | ||
285 | } | ||
286 | |||
287 | static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, | ||
288 | struct mm_struct *mm, | ||
289 | unsigned long address) | ||
290 | { | ||
291 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | ||
292 | int young; | ||
293 | |||
294 | spin_lock(&kvm->mmu_lock); | ||
295 | young = kvm_age_hva(kvm, address); | ||
296 | spin_unlock(&kvm->mmu_lock); | ||
297 | |||
298 | if (young) | ||
299 | kvm_flush_remote_tlbs(kvm); | ||
300 | |||
301 | return young; | ||
302 | } | ||
303 | |||
304 | static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { | ||
305 | .invalidate_page = kvm_mmu_notifier_invalidate_page, | ||
306 | .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start, | ||
307 | .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, | ||
308 | .clear_flush_young = kvm_mmu_notifier_clear_flush_young, | ||
309 | }; | ||
310 | #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ | ||
311 | |||
195 | static struct kvm *kvm_create_vm(void) | 312 | static struct kvm *kvm_create_vm(void) |
196 | { | 313 | { |
197 | struct kvm *kvm = kvm_arch_create_vm(); | 314 | struct kvm *kvm = kvm_arch_create_vm(); |
@@ -212,6 +329,21 @@ static struct kvm *kvm_create_vm(void) | |||
212 | (struct kvm_coalesced_mmio_ring *)page_address(page); | 329 | (struct kvm_coalesced_mmio_ring *)page_address(page); |
213 | #endif | 330 | #endif |
214 | 331 | ||
332 | #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) | ||
333 | { | ||
334 | int err; | ||
335 | kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops; | ||
336 | err = mmu_notifier_register(&kvm->mmu_notifier, current->mm); | ||
337 | if (err) { | ||
338 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | ||
339 | put_page(page); | ||
340 | #endif | ||
341 | kfree(kvm); | ||
342 | return ERR_PTR(err); | ||
343 | } | ||
344 | } | ||
345 | #endif | ||
346 | |||
215 | kvm->mm = current->mm; | 347 | kvm->mm = current->mm; |
216 | atomic_inc(&kvm->mm->mm_count); | 348 | atomic_inc(&kvm->mm->mm_count); |
217 | spin_lock_init(&kvm->mmu_lock); | 349 | spin_lock_init(&kvm->mmu_lock); |
@@ -272,6 +404,9 @@ static void kvm_destroy_vm(struct kvm *kvm) | |||
272 | if (kvm->coalesced_mmio_ring != NULL) | 404 | if (kvm->coalesced_mmio_ring != NULL) |
273 | free_page((unsigned long)kvm->coalesced_mmio_ring); | 405 | free_page((unsigned long)kvm->coalesced_mmio_ring); |
274 | #endif | 406 | #endif |
407 | #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) | ||
408 | mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); | ||
409 | #endif | ||
275 | kvm_arch_destroy_vm(kvm); | 410 | kvm_arch_destroy_vm(kvm); |
276 | mmdrop(mm); | 411 | mmdrop(mm); |
277 | } | 412 | } |
@@ -375,7 +510,15 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
375 | memset(new.rmap, 0, npages * sizeof(*new.rmap)); | 510 | memset(new.rmap, 0, npages * sizeof(*new.rmap)); |
376 | 511 | ||
377 | new.user_alloc = user_alloc; | 512 | new.user_alloc = user_alloc; |
378 | new.userspace_addr = mem->userspace_addr; | 513 | /* |
514 | * hva_to_rmmap() serialzies with the mmu_lock and to be | ||
515 | * safe it has to ignore memslots with !user_alloc && | ||
516 | * !userspace_addr. | ||
517 | */ | ||
518 | if (user_alloc) | ||
519 | new.userspace_addr = mem->userspace_addr; | ||
520 | else | ||
521 | new.userspace_addr = 0; | ||
379 | } | 522 | } |
380 | if (npages && !new.lpage_info) { | 523 | if (npages && !new.lpage_info) { |
381 | int largepages = npages / KVM_PAGES_PER_HPAGE; | 524 | int largepages = npages / KVM_PAGES_PER_HPAGE; |
@@ -408,17 +551,21 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
408 | } | 551 | } |
409 | #endif /* not defined CONFIG_S390 */ | 552 | #endif /* not defined CONFIG_S390 */ |
410 | 553 | ||
411 | if (mem->slot >= kvm->nmemslots) | ||
412 | kvm->nmemslots = mem->slot + 1; | ||
413 | |||
414 | if (!npages) | 554 | if (!npages) |
415 | kvm_arch_flush_shadow(kvm); | 555 | kvm_arch_flush_shadow(kvm); |
416 | 556 | ||
557 | spin_lock(&kvm->mmu_lock); | ||
558 | if (mem->slot >= kvm->nmemslots) | ||
559 | kvm->nmemslots = mem->slot + 1; | ||
560 | |||
417 | *memslot = new; | 561 | *memslot = new; |
562 | spin_unlock(&kvm->mmu_lock); | ||
418 | 563 | ||
419 | r = kvm_arch_set_memory_region(kvm, mem, old, user_alloc); | 564 | r = kvm_arch_set_memory_region(kvm, mem, old, user_alloc); |
420 | if (r) { | 565 | if (r) { |
566 | spin_lock(&kvm->mmu_lock); | ||
421 | *memslot = old; | 567 | *memslot = old; |
568 | spin_unlock(&kvm->mmu_lock); | ||
422 | goto out_free; | 569 | goto out_free; |
423 | } | 570 | } |
424 | 571 | ||