diff options
author | Xiao Guangrong <xiaoguangrong@cn.fujitsu.com> | 2011-07-11 15:30:35 -0400 |
---|---|---|
committer | Avi Kivity <avi@redhat.com> | 2011-07-24 04:50:36 -0400 |
commit | 1df9f2dc39948c3cb900725b7f0754fb385c8354 (patch) | |
tree | 53731b23b8e2e95ce8d9f943acd9e0d83db4e7da /arch/x86 | |
parent | d7c55201e66e9f702db575c9dfc2d34a7af6cf1f (diff) |
KVM: MMU: introduce the rules to modify shadow page table
Introduce some interfaces to modify spte as linux kernel does:
- mmu_spte_clear_track_bits, it set the spte from present to nonpresent, and
track the stat bits(accessed/dirty) of spte
- mmu_spte_clear_no_track, the same as mmu_spte_clear_track_bits except
tracking the stat bits
- mmu_spte_set, set spte from nonpresent to present
- mmu_spte_update, only update the stat bits
Now, it does not allowed to set spte from present to present, later, we can
drop the atomicly opration for X86_32 host, and it is the preparing work to
get spte on X86_32 host out of the mmu lock
Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/kvm/mmu.c | 103 |
1 files changed, 69 insertions, 34 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 1d4a2d9cc718..982718fe12a7 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -299,12 +299,30 @@ static bool spte_is_bit_cleared(u64 old_spte, u64 new_spte, u64 bit_mask) | |||
299 | return (old_spte & bit_mask) && !(new_spte & bit_mask); | 299 | return (old_spte & bit_mask) && !(new_spte & bit_mask); |
300 | } | 300 | } |
301 | 301 | ||
302 | static void update_spte(u64 *sptep, u64 new_spte) | 302 | /* Rules for using mmu_spte_set: |
303 | * Set the sptep from nonpresent to present. | ||
304 | * Note: the sptep being assigned *must* be either not present | ||
305 | * or in a state where the hardware will not attempt to update | ||
306 | * the spte. | ||
307 | */ | ||
308 | static void mmu_spte_set(u64 *sptep, u64 new_spte) | ||
309 | { | ||
310 | WARN_ON(is_shadow_present_pte(*sptep)); | ||
311 | __set_spte(sptep, new_spte); | ||
312 | } | ||
313 | |||
314 | /* Rules for using mmu_spte_update: | ||
315 | * Update the state bits, it means the mapped pfn is not changged. | ||
316 | */ | ||
317 | static void mmu_spte_update(u64 *sptep, u64 new_spte) | ||
303 | { | 318 | { |
304 | u64 mask, old_spte = *sptep; | 319 | u64 mask, old_spte = *sptep; |
305 | 320 | ||
306 | WARN_ON(!is_rmap_spte(new_spte)); | 321 | WARN_ON(!is_rmap_spte(new_spte)); |
307 | 322 | ||
323 | if (!is_shadow_present_pte(old_spte)) | ||
324 | return mmu_spte_set(sptep, new_spte); | ||
325 | |||
308 | new_spte |= old_spte & shadow_dirty_mask; | 326 | new_spte |= old_spte & shadow_dirty_mask; |
309 | 327 | ||
310 | mask = shadow_accessed_mask; | 328 | mask = shadow_accessed_mask; |
@@ -325,6 +343,42 @@ static void update_spte(u64 *sptep, u64 new_spte) | |||
325 | kvm_set_pfn_dirty(spte_to_pfn(old_spte)); | 343 | kvm_set_pfn_dirty(spte_to_pfn(old_spte)); |
326 | } | 344 | } |
327 | 345 | ||
346 | /* | ||
347 | * Rules for using mmu_spte_clear_track_bits: | ||
348 | * It sets the sptep from present to nonpresent, and track the | ||
349 | * state bits, it is used to clear the last level sptep. | ||
350 | */ | ||
351 | static int mmu_spte_clear_track_bits(u64 *sptep) | ||
352 | { | ||
353 | pfn_t pfn; | ||
354 | u64 old_spte = *sptep; | ||
355 | |||
356 | if (!spte_has_volatile_bits(old_spte)) | ||
357 | __set_spte(sptep, 0ull); | ||
358 | else | ||
359 | old_spte = __xchg_spte(sptep, 0ull); | ||
360 | |||
361 | if (!is_rmap_spte(old_spte)) | ||
362 | return 0; | ||
363 | |||
364 | pfn = spte_to_pfn(old_spte); | ||
365 | if (!shadow_accessed_mask || old_spte & shadow_accessed_mask) | ||
366 | kvm_set_pfn_accessed(pfn); | ||
367 | if (!shadow_dirty_mask || (old_spte & shadow_dirty_mask)) | ||
368 | kvm_set_pfn_dirty(pfn); | ||
369 | return 1; | ||
370 | } | ||
371 | |||
372 | /* | ||
373 | * Rules for using mmu_spte_clear_no_track: | ||
374 | * Directly clear spte without caring the state bits of sptep, | ||
375 | * it is used to set the upper level spte. | ||
376 | */ | ||
377 | static void mmu_spte_clear_no_track(u64 *sptep) | ||
378 | { | ||
379 | __set_spte(sptep, 0ull); | ||
380 | } | ||
381 | |||
328 | static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, | 382 | static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, |
329 | struct kmem_cache *base_cache, int min) | 383 | struct kmem_cache *base_cache, int min) |
330 | { | 384 | { |
@@ -746,30 +800,9 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) | |||
746 | pte_list_remove(spte, rmapp); | 800 | pte_list_remove(spte, rmapp); |
747 | } | 801 | } |
748 | 802 | ||
749 | static int set_spte_track_bits(u64 *sptep, u64 new_spte) | ||
750 | { | ||
751 | pfn_t pfn; | ||
752 | u64 old_spte = *sptep; | ||
753 | |||
754 | if (!spte_has_volatile_bits(old_spte)) | ||
755 | __set_spte(sptep, new_spte); | ||
756 | else | ||
757 | old_spte = __xchg_spte(sptep, new_spte); | ||
758 | |||
759 | if (!is_rmap_spte(old_spte)) | ||
760 | return 0; | ||
761 | |||
762 | pfn = spte_to_pfn(old_spte); | ||
763 | if (!shadow_accessed_mask || old_spte & shadow_accessed_mask) | ||
764 | kvm_set_pfn_accessed(pfn); | ||
765 | if (!shadow_dirty_mask || (old_spte & shadow_dirty_mask)) | ||
766 | kvm_set_pfn_dirty(pfn); | ||
767 | return 1; | ||
768 | } | ||
769 | |||
770 | static void drop_spte(struct kvm *kvm, u64 *sptep) | 803 | static void drop_spte(struct kvm *kvm, u64 *sptep) |
771 | { | 804 | { |
772 | if (set_spte_track_bits(sptep, 0ull)) | 805 | if (mmu_spte_clear_track_bits(sptep)) |
773 | rmap_remove(kvm, sptep); | 806 | rmap_remove(kvm, sptep); |
774 | } | 807 | } |
775 | 808 | ||
@@ -787,7 +820,7 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
787 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | 820 | BUG_ON(!(*spte & PT_PRESENT_MASK)); |
788 | rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); | 821 | rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); |
789 | if (is_writable_pte(*spte)) { | 822 | if (is_writable_pte(*spte)) { |
790 | update_spte(spte, *spte & ~PT_WRITABLE_MASK); | 823 | mmu_spte_update(spte, *spte & ~PT_WRITABLE_MASK); |
791 | write_protected = 1; | 824 | write_protected = 1; |
792 | } | 825 | } |
793 | spte = rmap_next(kvm, rmapp, spte); | 826 | spte = rmap_next(kvm, rmapp, spte); |
@@ -856,7 +889,8 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
856 | new_spte &= ~PT_WRITABLE_MASK; | 889 | new_spte &= ~PT_WRITABLE_MASK; |
857 | new_spte &= ~SPTE_HOST_WRITEABLE; | 890 | new_spte &= ~SPTE_HOST_WRITEABLE; |
858 | new_spte &= ~shadow_accessed_mask; | 891 | new_spte &= ~shadow_accessed_mask; |
859 | set_spte_track_bits(spte, new_spte); | 892 | mmu_spte_clear_track_bits(spte); |
893 | mmu_spte_set(spte, new_spte); | ||
860 | spte = rmap_next(kvm, rmapp, spte); | 894 | spte = rmap_next(kvm, rmapp, spte); |
861 | } | 895 | } |
862 | } | 896 | } |
@@ -1077,7 +1111,7 @@ static void drop_parent_pte(struct kvm_mmu_page *sp, | |||
1077 | u64 *parent_pte) | 1111 | u64 *parent_pte) |
1078 | { | 1112 | { |
1079 | mmu_page_remove_parent_pte(sp, parent_pte); | 1113 | mmu_page_remove_parent_pte(sp, parent_pte); |
1080 | __set_spte(parent_pte, 0ull); | 1114 | mmu_spte_clear_no_track(parent_pte); |
1081 | } | 1115 | } |
1082 | 1116 | ||
1083 | static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, | 1117 | static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, |
@@ -1525,7 +1559,7 @@ static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp) | |||
1525 | spte = __pa(sp->spt) | 1559 | spte = __pa(sp->spt) |
1526 | | PT_PRESENT_MASK | PT_ACCESSED_MASK | 1560 | | PT_PRESENT_MASK | PT_ACCESSED_MASK |
1527 | | PT_WRITABLE_MASK | PT_USER_MASK; | 1561 | | PT_WRITABLE_MASK | PT_USER_MASK; |
1528 | __set_spte(sptep, spte); | 1562 | mmu_spte_set(sptep, spte); |
1529 | } | 1563 | } |
1530 | 1564 | ||
1531 | static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep) | 1565 | static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep) |
@@ -1992,7 +2026,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1992 | mark_page_dirty(vcpu->kvm, gfn); | 2026 | mark_page_dirty(vcpu->kvm, gfn); |
1993 | 2027 | ||
1994 | set_pte: | 2028 | set_pte: |
1995 | update_spte(sptep, spte); | 2029 | mmu_spte_update(sptep, spte); |
1996 | /* | 2030 | /* |
1997 | * If we overwrite a writable spte with a read-only one we | 2031 | * If we overwrite a writable spte with a read-only one we |
1998 | * should flush remote TLBs. Otherwise rmap_write_protect | 2032 | * should flush remote TLBs. Otherwise rmap_write_protect |
@@ -2198,11 +2232,11 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
2198 | return -ENOMEM; | 2232 | return -ENOMEM; |
2199 | } | 2233 | } |
2200 | 2234 | ||
2201 | __set_spte(iterator.sptep, | 2235 | mmu_spte_set(iterator.sptep, |
2202 | __pa(sp->spt) | 2236 | __pa(sp->spt) |
2203 | | PT_PRESENT_MASK | PT_WRITABLE_MASK | 2237 | | PT_PRESENT_MASK | PT_WRITABLE_MASK |
2204 | | shadow_user_mask | shadow_x_mask | 2238 | | shadow_user_mask | shadow_x_mask |
2205 | | shadow_accessed_mask); | 2239 | | shadow_accessed_mask); |
2206 | } | 2240 | } |
2207 | } | 2241 | } |
2208 | return emulate; | 2242 | return emulate; |
@@ -3439,7 +3473,8 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | |||
3439 | 3473 | ||
3440 | /* avoid RMW */ | 3474 | /* avoid RMW */ |
3441 | if (is_writable_pte(pt[i])) | 3475 | if (is_writable_pte(pt[i])) |
3442 | update_spte(&pt[i], pt[i] & ~PT_WRITABLE_MASK); | 3476 | mmu_spte_update(&pt[i], |
3477 | pt[i] & ~PT_WRITABLE_MASK); | ||
3443 | } | 3478 | } |
3444 | } | 3479 | } |
3445 | kvm_flush_remote_tlbs(kvm); | 3480 | kvm_flush_remote_tlbs(kvm); |