diff options
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 90 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.h | 1 |
3 files changed, 93 insertions, 0 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3741c653767c..bff7d464a6ae 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -222,6 +222,7 @@ struct kvm_mmu_page { | |||
222 | int root_count; /* Currently serving as active root */ | 222 | int root_count; /* Currently serving as active root */ |
223 | unsigned int unsync_children; | 223 | unsigned int unsync_children; |
224 | unsigned long parent_ptes; /* Reverse mapping for parent_pte */ | 224 | unsigned long parent_ptes; /* Reverse mapping for parent_pte */ |
225 | unsigned long mmu_valid_gen; | ||
225 | DECLARE_BITMAP(unsync_child_bitmap, 512); | 226 | DECLARE_BITMAP(unsync_child_bitmap, 512); |
226 | 227 | ||
227 | #ifdef CONFIG_X86_32 | 228 | #ifdef CONFIG_X86_32 |
@@ -529,6 +530,7 @@ struct kvm_arch { | |||
529 | unsigned int n_requested_mmu_pages; | 530 | unsigned int n_requested_mmu_pages; |
530 | unsigned int n_max_mmu_pages; | 531 | unsigned int n_max_mmu_pages; |
531 | unsigned int indirect_shadow_pages; | 532 | unsigned int indirect_shadow_pages; |
533 | unsigned long mmu_valid_gen; | ||
532 | struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; | 534 | struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; |
533 | /* | 535 | /* |
534 | * Hash table of struct kvm_mmu_page. | 536 | * Hash table of struct kvm_mmu_page. |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index f8ca2f351395..d71bf8fcccf8 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -1511,6 +1511,12 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, | |||
1511 | if (!direct) | 1511 | if (!direct) |
1512 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache); | 1512 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache); |
1513 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); | 1513 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); |
1514 | |||
1515 | /* | ||
1516 | * The active_mmu_pages list is the FIFO list, do not move the | ||
1517 | * page until it is zapped. kvm_zap_obsolete_pages depends on | ||
1518 | * this feature. See the comments in kvm_zap_obsolete_pages(). | ||
1519 | */ | ||
1514 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); | 1520 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); |
1515 | sp->parent_ptes = 0; | 1521 | sp->parent_ptes = 0; |
1516 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); | 1522 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); |
@@ -1838,6 +1844,11 @@ static void clear_sp_write_flooding_count(u64 *spte) | |||
1838 | __clear_sp_write_flooding_count(sp); | 1844 | __clear_sp_write_flooding_count(sp); |
1839 | } | 1845 | } |
1840 | 1846 | ||
1847 | static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp) | ||
1848 | { | ||
1849 | return unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen); | ||
1850 | } | ||
1851 | |||
1841 | static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | 1852 | static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, |
1842 | gfn_t gfn, | 1853 | gfn_t gfn, |
1843 | gva_t gaddr, | 1854 | gva_t gaddr, |
@@ -1900,6 +1911,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
1900 | 1911 | ||
1901 | account_shadowed(vcpu->kvm, gfn); | 1912 | account_shadowed(vcpu->kvm, gfn); |
1902 | } | 1913 | } |
1914 | sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen; | ||
1903 | init_shadow_page_table(sp); | 1915 | init_shadow_page_table(sp); |
1904 | trace_kvm_mmu_get_page(sp, true); | 1916 | trace_kvm_mmu_get_page(sp, true); |
1905 | return sp; | 1917 | return sp; |
@@ -2070,8 +2082,10 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, | |||
2070 | ret = mmu_zap_unsync_children(kvm, sp, invalid_list); | 2082 | ret = mmu_zap_unsync_children(kvm, sp, invalid_list); |
2071 | kvm_mmu_page_unlink_children(kvm, sp); | 2083 | kvm_mmu_page_unlink_children(kvm, sp); |
2072 | kvm_mmu_unlink_parents(kvm, sp); | 2084 | kvm_mmu_unlink_parents(kvm, sp); |
2085 | |||
2073 | if (!sp->role.invalid && !sp->role.direct) | 2086 | if (!sp->role.invalid && !sp->role.direct) |
2074 | unaccount_shadowed(kvm, sp->gfn); | 2087 | unaccount_shadowed(kvm, sp->gfn); |
2088 | |||
2075 | if (sp->unsync) | 2089 | if (sp->unsync) |
2076 | kvm_unlink_unsync_page(kvm, sp); | 2090 | kvm_unlink_unsync_page(kvm, sp); |
2077 | if (!sp->root_count) { | 2091 | if (!sp->root_count) { |
@@ -4195,6 +4209,82 @@ restart: | |||
4195 | spin_unlock(&kvm->mmu_lock); | 4209 | spin_unlock(&kvm->mmu_lock); |
4196 | } | 4210 | } |
4197 | 4211 | ||
4212 | static void kvm_zap_obsolete_pages(struct kvm *kvm) | ||
4213 | { | ||
4214 | struct kvm_mmu_page *sp, *node; | ||
4215 | LIST_HEAD(invalid_list); | ||
4216 | |||
4217 | restart: | ||
4218 | list_for_each_entry_safe_reverse(sp, node, | ||
4219 | &kvm->arch.active_mmu_pages, link) { | ||
4220 | /* | ||
4221 | * No obsolete page exists before new created page since | ||
4222 | * active_mmu_pages is the FIFO list. | ||
4223 | */ | ||
4224 | if (!is_obsolete_sp(kvm, sp)) | ||
4225 | break; | ||
4226 | |||
4227 | /* | ||
4228 | * Do not repeatedly zap a root page to avoid unnecessary | ||
4229 | * KVM_REQ_MMU_RELOAD, otherwise we may not be able to | ||
4230 | * progress: | ||
4231 | * vcpu 0 vcpu 1 | ||
4232 | * call vcpu_enter_guest(): | ||
4233 | * 1): handle KVM_REQ_MMU_RELOAD | ||
4234 | * and require mmu-lock to | ||
4235 | * load mmu | ||
4236 | * repeat: | ||
4237 | * 1): zap root page and | ||
4238 | * send KVM_REQ_MMU_RELOAD | ||
4239 | * | ||
4240 | * 2): if (cond_resched_lock(mmu-lock)) | ||
4241 | * | ||
4242 | * 2): hold mmu-lock and load mmu | ||
4243 | * | ||
4244 | * 3): see KVM_REQ_MMU_RELOAD bit | ||
4245 | * on vcpu->requests is set | ||
4246 | * then return 1 to call | ||
4247 | * vcpu_enter_guest() again. | ||
4248 | * goto repeat; | ||
4249 | * | ||
4250 | * Since we are reversely walking the list and the invalid | ||
4251 | * list will be moved to the head, skip the invalid page | ||
4252 | * can help us to avoid the infinity list walking. | ||
4253 | */ | ||
4254 | if (sp->role.invalid) | ||
4255 | continue; | ||
4256 | |||
4257 | if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { | ||
4258 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | ||
4259 | cond_resched_lock(&kvm->mmu_lock); | ||
4260 | goto restart; | ||
4261 | } | ||
4262 | |||
4263 | if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list)) | ||
4264 | goto restart; | ||
4265 | } | ||
4266 | |||
4267 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | ||
4268 | } | ||
4269 | |||
4270 | /* | ||
4271 | * Fast invalidate all shadow pages and use lock-break technique | ||
4272 | * to zap obsolete pages. | ||
4273 | * | ||
4274 | * It's required when memslot is being deleted or VM is being | ||
4275 | * destroyed, in these cases, we should ensure that KVM MMU does | ||
4276 | * not use any resource of the being-deleted slot or all slots | ||
4277 | * after calling the function. | ||
4278 | */ | ||
4279 | void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm) | ||
4280 | { | ||
4281 | spin_lock(&kvm->mmu_lock); | ||
4282 | kvm->arch.mmu_valid_gen++; | ||
4283 | |||
4284 | kvm_zap_obsolete_pages(kvm); | ||
4285 | spin_unlock(&kvm->mmu_lock); | ||
4286 | } | ||
4287 | |||
4198 | void kvm_mmu_zap_mmio_sptes(struct kvm *kvm) | 4288 | void kvm_mmu_zap_mmio_sptes(struct kvm *kvm) |
4199 | { | 4289 | { |
4200 | struct kvm_mmu_page *sp, *node; | 4290 | struct kvm_mmu_page *sp, *node; |
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 2adcbc2cac6d..922bfae77c58 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h | |||
@@ -97,4 +97,5 @@ static inline bool permission_fault(struct kvm_mmu *mmu, unsigned pte_access, | |||
97 | return (mmu->permissions[pfec >> 1] >> pte_access) & 1; | 97 | return (mmu->permissions[pfec >> 1] >> pte_access) & 1; |
98 | } | 98 | } |
99 | 99 | ||
100 | void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm); | ||
100 | #endif | 101 | #endif |