diff options
| author | Xiao Guangrong <xiaoguangrong@cn.fujitsu.com> | 2011-07-11 15:30:35 -0400 |
|---|---|---|
| committer | Avi Kivity <avi@redhat.com> | 2011-07-24 04:50:36 -0400 |
| commit | 1df9f2dc39948c3cb900725b7f0754fb385c8354 (patch) | |
| tree | 53731b23b8e2e95ce8d9f943acd9e0d83db4e7da | |
| parent | d7c55201e66e9f702db575c9dfc2d34a7af6cf1f (diff) | |
KVM: MMU: introduce the rules to modify shadow page table
Introduce some interfaces to modify spte as linux kernel does:
- mmu_spte_clear_track_bits, it set the spte from present to nonpresent, and
track the stat bits(accessed/dirty) of spte
- mmu_spte_clear_no_track, the same as mmu_spte_clear_track_bits except
tracking the stat bits
- mmu_spte_set, set spte from nonpresent to present
- mmu_spte_update, only update the stat bits
Now, it does not allowed to set spte from present to present, later, we can
drop the atomicly opration for X86_32 host, and it is the preparing work to
get spte on X86_32 host out of the mmu lock
Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
| -rw-r--r-- | arch/x86/kvm/mmu.c | 103 |
1 files changed, 69 insertions, 34 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 1d4a2d9cc718..982718fe12a7 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
| @@ -299,12 +299,30 @@ static bool spte_is_bit_cleared(u64 old_spte, u64 new_spte, u64 bit_mask) | |||
| 299 | return (old_spte & bit_mask) && !(new_spte & bit_mask); | 299 | return (old_spte & bit_mask) && !(new_spte & bit_mask); |
| 300 | } | 300 | } |
| 301 | 301 | ||
| 302 | static void update_spte(u64 *sptep, u64 new_spte) | 302 | /* Rules for using mmu_spte_set: |
| 303 | * Set the sptep from nonpresent to present. | ||
| 304 | * Note: the sptep being assigned *must* be either not present | ||
| 305 | * or in a state where the hardware will not attempt to update | ||
| 306 | * the spte. | ||
| 307 | */ | ||
| 308 | static void mmu_spte_set(u64 *sptep, u64 new_spte) | ||
| 309 | { | ||
| 310 | WARN_ON(is_shadow_present_pte(*sptep)); | ||
| 311 | __set_spte(sptep, new_spte); | ||
| 312 | } | ||
| 313 | |||
| 314 | /* Rules for using mmu_spte_update: | ||
| 315 | * Update the state bits, it means the mapped pfn is not changged. | ||
| 316 | */ | ||
| 317 | static void mmu_spte_update(u64 *sptep, u64 new_spte) | ||
| 303 | { | 318 | { |
| 304 | u64 mask, old_spte = *sptep; | 319 | u64 mask, old_spte = *sptep; |
| 305 | 320 | ||
| 306 | WARN_ON(!is_rmap_spte(new_spte)); | 321 | WARN_ON(!is_rmap_spte(new_spte)); |
| 307 | 322 | ||
| 323 | if (!is_shadow_present_pte(old_spte)) | ||
| 324 | return mmu_spte_set(sptep, new_spte); | ||
| 325 | |||
| 308 | new_spte |= old_spte & shadow_dirty_mask; | 326 | new_spte |= old_spte & shadow_dirty_mask; |
| 309 | 327 | ||
| 310 | mask = shadow_accessed_mask; | 328 | mask = shadow_accessed_mask; |
| @@ -325,6 +343,42 @@ static void update_spte(u64 *sptep, u64 new_spte) | |||
| 325 | kvm_set_pfn_dirty(spte_to_pfn(old_spte)); | 343 | kvm_set_pfn_dirty(spte_to_pfn(old_spte)); |
| 326 | } | 344 | } |
| 327 | 345 | ||
| 346 | /* | ||
| 347 | * Rules for using mmu_spte_clear_track_bits: | ||
| 348 | * It sets the sptep from present to nonpresent, and track the | ||
| 349 | * state bits, it is used to clear the last level sptep. | ||
| 350 | */ | ||
| 351 | static int mmu_spte_clear_track_bits(u64 *sptep) | ||
| 352 | { | ||
| 353 | pfn_t pfn; | ||
| 354 | u64 old_spte = *sptep; | ||
| 355 | |||
| 356 | if (!spte_has_volatile_bits(old_spte)) | ||
| 357 | __set_spte(sptep, 0ull); | ||
| 358 | else | ||
| 359 | old_spte = __xchg_spte(sptep, 0ull); | ||
| 360 | |||
| 361 | if (!is_rmap_spte(old_spte)) | ||
| 362 | return 0; | ||
| 363 | |||
| 364 | pfn = spte_to_pfn(old_spte); | ||
| 365 | if (!shadow_accessed_mask || old_spte & shadow_accessed_mask) | ||
| 366 | kvm_set_pfn_accessed(pfn); | ||
| 367 | if (!shadow_dirty_mask || (old_spte & shadow_dirty_mask)) | ||
| 368 | kvm_set_pfn_dirty(pfn); | ||
| 369 | return 1; | ||
| 370 | } | ||
| 371 | |||
| 372 | /* | ||
| 373 | * Rules for using mmu_spte_clear_no_track: | ||
| 374 | * Directly clear spte without caring the state bits of sptep, | ||
| 375 | * it is used to set the upper level spte. | ||
| 376 | */ | ||
| 377 | static void mmu_spte_clear_no_track(u64 *sptep) | ||
| 378 | { | ||
| 379 | __set_spte(sptep, 0ull); | ||
| 380 | } | ||
| 381 | |||
| 328 | static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, | 382 | static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, |
| 329 | struct kmem_cache *base_cache, int min) | 383 | struct kmem_cache *base_cache, int min) |
| 330 | { | 384 | { |
| @@ -746,30 +800,9 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) | |||
| 746 | pte_list_remove(spte, rmapp); | 800 | pte_list_remove(spte, rmapp); |
| 747 | } | 801 | } |
| 748 | 802 | ||
| 749 | static int set_spte_track_bits(u64 *sptep, u64 new_spte) | ||
| 750 | { | ||
| 751 | pfn_t pfn; | ||
| 752 | u64 old_spte = *sptep; | ||
| 753 | |||
| 754 | if (!spte_has_volatile_bits(old_spte)) | ||
| 755 | __set_spte(sptep, new_spte); | ||
| 756 | else | ||
| 757 | old_spte = __xchg_spte(sptep, new_spte); | ||
| 758 | |||
| 759 | if (!is_rmap_spte(old_spte)) | ||
| 760 | return 0; | ||
| 761 | |||
| 762 | pfn = spte_to_pfn(old_spte); | ||
| 763 | if (!shadow_accessed_mask || old_spte & shadow_accessed_mask) | ||
| 764 | kvm_set_pfn_accessed(pfn); | ||
| 765 | if (!shadow_dirty_mask || (old_spte & shadow_dirty_mask)) | ||
| 766 | kvm_set_pfn_dirty(pfn); | ||
| 767 | return 1; | ||
| 768 | } | ||
| 769 | |||
| 770 | static void drop_spte(struct kvm *kvm, u64 *sptep) | 803 | static void drop_spte(struct kvm *kvm, u64 *sptep) |
| 771 | { | 804 | { |
| 772 | if (set_spte_track_bits(sptep, 0ull)) | 805 | if (mmu_spte_clear_track_bits(sptep)) |
| 773 | rmap_remove(kvm, sptep); | 806 | rmap_remove(kvm, sptep); |
| 774 | } | 807 | } |
| 775 | 808 | ||
| @@ -787,7 +820,7 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
| 787 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | 820 | BUG_ON(!(*spte & PT_PRESENT_MASK)); |
| 788 | rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); | 821 | rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); |
| 789 | if (is_writable_pte(*spte)) { | 822 | if (is_writable_pte(*spte)) { |
| 790 | update_spte(spte, *spte & ~PT_WRITABLE_MASK); | 823 | mmu_spte_update(spte, *spte & ~PT_WRITABLE_MASK); |
| 791 | write_protected = 1; | 824 | write_protected = 1; |
| 792 | } | 825 | } |
| 793 | spte = rmap_next(kvm, rmapp, spte); | 826 | spte = rmap_next(kvm, rmapp, spte); |
| @@ -856,7 +889,8 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
| 856 | new_spte &= ~PT_WRITABLE_MASK; | 889 | new_spte &= ~PT_WRITABLE_MASK; |
| 857 | new_spte &= ~SPTE_HOST_WRITEABLE; | 890 | new_spte &= ~SPTE_HOST_WRITEABLE; |
| 858 | new_spte &= ~shadow_accessed_mask; | 891 | new_spte &= ~shadow_accessed_mask; |
| 859 | set_spte_track_bits(spte, new_spte); | 892 | mmu_spte_clear_track_bits(spte); |
| 893 | mmu_spte_set(spte, new_spte); | ||
| 860 | spte = rmap_next(kvm, rmapp, spte); | 894 | spte = rmap_next(kvm, rmapp, spte); |
| 861 | } | 895 | } |
| 862 | } | 896 | } |
| @@ -1077,7 +1111,7 @@ static void drop_parent_pte(struct kvm_mmu_page *sp, | |||
| 1077 | u64 *parent_pte) | 1111 | u64 *parent_pte) |
| 1078 | { | 1112 | { |
| 1079 | mmu_page_remove_parent_pte(sp, parent_pte); | 1113 | mmu_page_remove_parent_pte(sp, parent_pte); |
| 1080 | __set_spte(parent_pte, 0ull); | 1114 | mmu_spte_clear_no_track(parent_pte); |
| 1081 | } | 1115 | } |
| 1082 | 1116 | ||
| 1083 | static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, | 1117 | static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, |
| @@ -1525,7 +1559,7 @@ static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp) | |||
| 1525 | spte = __pa(sp->spt) | 1559 | spte = __pa(sp->spt) |
| 1526 | | PT_PRESENT_MASK | PT_ACCESSED_MASK | 1560 | | PT_PRESENT_MASK | PT_ACCESSED_MASK |
| 1527 | | PT_WRITABLE_MASK | PT_USER_MASK; | 1561 | | PT_WRITABLE_MASK | PT_USER_MASK; |
| 1528 | __set_spte(sptep, spte); | 1562 | mmu_spte_set(sptep, spte); |
| 1529 | } | 1563 | } |
| 1530 | 1564 | ||
| 1531 | static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep) | 1565 | static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep) |
| @@ -1992,7 +2026,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
| 1992 | mark_page_dirty(vcpu->kvm, gfn); | 2026 | mark_page_dirty(vcpu->kvm, gfn); |
| 1993 | 2027 | ||
| 1994 | set_pte: | 2028 | set_pte: |
| 1995 | update_spte(sptep, spte); | 2029 | mmu_spte_update(sptep, spte); |
| 1996 | /* | 2030 | /* |
| 1997 | * If we overwrite a writable spte with a read-only one we | 2031 | * If we overwrite a writable spte with a read-only one we |
| 1998 | * should flush remote TLBs. Otherwise rmap_write_protect | 2032 | * should flush remote TLBs. Otherwise rmap_write_protect |
| @@ -2198,11 +2232,11 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
| 2198 | return -ENOMEM; | 2232 | return -ENOMEM; |
| 2199 | } | 2233 | } |
| 2200 | 2234 | ||
| 2201 | __set_spte(iterator.sptep, | 2235 | mmu_spte_set(iterator.sptep, |
| 2202 | __pa(sp->spt) | 2236 | __pa(sp->spt) |
| 2203 | | PT_PRESENT_MASK | PT_WRITABLE_MASK | 2237 | | PT_PRESENT_MASK | PT_WRITABLE_MASK |
| 2204 | | shadow_user_mask | shadow_x_mask | 2238 | | shadow_user_mask | shadow_x_mask |
| 2205 | | shadow_accessed_mask); | 2239 | | shadow_accessed_mask); |
| 2206 | } | 2240 | } |
| 2207 | } | 2241 | } |
| 2208 | return emulate; | 2242 | return emulate; |
| @@ -3439,7 +3473,8 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | |||
| 3439 | 3473 | ||
| 3440 | /* avoid RMW */ | 3474 | /* avoid RMW */ |
| 3441 | if (is_writable_pte(pt[i])) | 3475 | if (is_writable_pte(pt[i])) |
| 3442 | update_spte(&pt[i], pt[i] & ~PT_WRITABLE_MASK); | 3476 | mmu_spte_update(&pt[i], |
| 3477 | pt[i] & ~PT_WRITABLE_MASK); | ||
| 3443 | } | 3478 | } |
| 3444 | } | 3479 | } |
| 3445 | kvm_flush_remote_tlbs(kvm); | 3480 | kvm_flush_remote_tlbs(kvm); |
