aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorXiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>2013-05-30 20:36:22 -0400
committerGleb Natapov <gleb@redhat.com>2013-06-05 05:32:33 -0400
commit5304b8d37c2a5ebca48330f5e7868d240eafbed1 (patch)
treeca8df12267cf394c2cb871b7c3ebd742902a55fb
parenta2ae162265e88bf5490ce54fd5f2d430d6d992b7 (diff)
KVM: MMU: fast invalidate all pages
The current kvm_mmu_zap_all is really slow - it is holding mmu-lock to walk and zap all shadow pages one by one, also it need to zap all guest page's rmap and all shadow page's parent spte list. Particularly, things become worse if guest uses more memory or vcpus. It is not good for scalability In this patch, we introduce a faster way to invalidate all shadow pages. KVM maintains a global mmu invalid generation-number which is stored in kvm->arch.mmu_valid_gen and every shadow page stores the current global generation-number into sp->mmu_valid_gen when it is created When KVM need zap all shadow pages sptes, it just simply increase the global generation-number then reload root shadow pages on all vcpus. Vcpu will create a new shadow page table according to current kvm's generation-number. It ensures the old pages are not used any more. Then the obsolete pages (sp->mmu_valid_gen != kvm->arch.mmu_valid_gen) are zapped by using lock-break technique Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com> Reviewed-by: Marcelo Tosatti <mtosatti@redhat.com> Signed-off-by: Gleb Natapov <gleb@redhat.com>
-rw-r--r--arch/x86/include/asm/kvm_host.h2
-rw-r--r--arch/x86/kvm/mmu.c90
-rw-r--r--arch/x86/kvm/mmu.h1
3 files changed, 93 insertions, 0 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3741c653767c..bff7d464a6ae 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -222,6 +222,7 @@ struct kvm_mmu_page {
222 int root_count; /* Currently serving as active root */ 222 int root_count; /* Currently serving as active root */
223 unsigned int unsync_children; 223 unsigned int unsync_children;
224 unsigned long parent_ptes; /* Reverse mapping for parent_pte */ 224 unsigned long parent_ptes; /* Reverse mapping for parent_pte */
225 unsigned long mmu_valid_gen;
225 DECLARE_BITMAP(unsync_child_bitmap, 512); 226 DECLARE_BITMAP(unsync_child_bitmap, 512);
226 227
227#ifdef CONFIG_X86_32 228#ifdef CONFIG_X86_32
@@ -529,6 +530,7 @@ struct kvm_arch {
529 unsigned int n_requested_mmu_pages; 530 unsigned int n_requested_mmu_pages;
530 unsigned int n_max_mmu_pages; 531 unsigned int n_max_mmu_pages;
531 unsigned int indirect_shadow_pages; 532 unsigned int indirect_shadow_pages;
533 unsigned long mmu_valid_gen;
532 struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; 534 struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
533 /* 535 /*
534 * Hash table of struct kvm_mmu_page. 536 * Hash table of struct kvm_mmu_page.
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index f8ca2f351395..d71bf8fcccf8 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1511,6 +1511,12 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
1511 if (!direct) 1511 if (!direct)
1512 sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache); 1512 sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache);
1513 set_page_private(virt_to_page(sp->spt), (unsigned long)sp); 1513 set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
1514
1515 /*
1516 * The active_mmu_pages list is the FIFO list, do not move the
1517 * page until it is zapped. kvm_zap_obsolete_pages depends on
1518 * this feature. See the comments in kvm_zap_obsolete_pages().
1519 */
1514 list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); 1520 list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
1515 sp->parent_ptes = 0; 1521 sp->parent_ptes = 0;
1516 mmu_page_add_parent_pte(vcpu, sp, parent_pte); 1522 mmu_page_add_parent_pte(vcpu, sp, parent_pte);
@@ -1838,6 +1844,11 @@ static void clear_sp_write_flooding_count(u64 *spte)
1838 __clear_sp_write_flooding_count(sp); 1844 __clear_sp_write_flooding_count(sp);
1839} 1845}
1840 1846
1847static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
1848{
1849 return unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
1850}
1851
1841static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, 1852static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
1842 gfn_t gfn, 1853 gfn_t gfn,
1843 gva_t gaddr, 1854 gva_t gaddr,
@@ -1900,6 +1911,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
1900 1911
1901 account_shadowed(vcpu->kvm, gfn); 1912 account_shadowed(vcpu->kvm, gfn);
1902 } 1913 }
1914 sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen;
1903 init_shadow_page_table(sp); 1915 init_shadow_page_table(sp);
1904 trace_kvm_mmu_get_page(sp, true); 1916 trace_kvm_mmu_get_page(sp, true);
1905 return sp; 1917 return sp;
@@ -2070,8 +2082,10 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
2070 ret = mmu_zap_unsync_children(kvm, sp, invalid_list); 2082 ret = mmu_zap_unsync_children(kvm, sp, invalid_list);
2071 kvm_mmu_page_unlink_children(kvm, sp); 2083 kvm_mmu_page_unlink_children(kvm, sp);
2072 kvm_mmu_unlink_parents(kvm, sp); 2084 kvm_mmu_unlink_parents(kvm, sp);
2085
2073 if (!sp->role.invalid && !sp->role.direct) 2086 if (!sp->role.invalid && !sp->role.direct)
2074 unaccount_shadowed(kvm, sp->gfn); 2087 unaccount_shadowed(kvm, sp->gfn);
2088
2075 if (sp->unsync) 2089 if (sp->unsync)
2076 kvm_unlink_unsync_page(kvm, sp); 2090 kvm_unlink_unsync_page(kvm, sp);
2077 if (!sp->root_count) { 2091 if (!sp->root_count) {
@@ -4195,6 +4209,82 @@ restart:
4195 spin_unlock(&kvm->mmu_lock); 4209 spin_unlock(&kvm->mmu_lock);
4196} 4210}
4197 4211
4212static void kvm_zap_obsolete_pages(struct kvm *kvm)
4213{
4214 struct kvm_mmu_page *sp, *node;
4215 LIST_HEAD(invalid_list);
4216
4217restart:
4218 list_for_each_entry_safe_reverse(sp, node,
4219 &kvm->arch.active_mmu_pages, link) {
4220 /*
4221 * No obsolete page exists before new created page since
4222 * active_mmu_pages is the FIFO list.
4223 */
4224 if (!is_obsolete_sp(kvm, sp))
4225 break;
4226
4227 /*
4228 * Do not repeatedly zap a root page to avoid unnecessary
4229 * KVM_REQ_MMU_RELOAD, otherwise we may not be able to
4230 * progress:
4231 * vcpu 0 vcpu 1
4232 * call vcpu_enter_guest():
4233 * 1): handle KVM_REQ_MMU_RELOAD
4234 * and require mmu-lock to
4235 * load mmu
4236 * repeat:
4237 * 1): zap root page and
4238 * send KVM_REQ_MMU_RELOAD
4239 *
4240 * 2): if (cond_resched_lock(mmu-lock))
4241 *
4242 * 2): hold mmu-lock and load mmu
4243 *
4244 * 3): see KVM_REQ_MMU_RELOAD bit
4245 * on vcpu->requests is set
4246 * then return 1 to call
4247 * vcpu_enter_guest() again.
4248 * goto repeat;
4249 *
4250 * Since we are reversely walking the list and the invalid
4251 * list will be moved to the head, skip the invalid page
4252 * can help us to avoid the infinity list walking.
4253 */
4254 if (sp->role.invalid)
4255 continue;
4256
4257 if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
4258 kvm_mmu_commit_zap_page(kvm, &invalid_list);
4259 cond_resched_lock(&kvm->mmu_lock);
4260 goto restart;
4261 }
4262
4263 if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list))
4264 goto restart;
4265 }
4266
4267 kvm_mmu_commit_zap_page(kvm, &invalid_list);
4268}
4269
4270/*
4271 * Fast invalidate all shadow pages and use lock-break technique
4272 * to zap obsolete pages.
4273 *
4274 * It's required when memslot is being deleted or VM is being
4275 * destroyed, in these cases, we should ensure that KVM MMU does
4276 * not use any resource of the being-deleted slot or all slots
4277 * after calling the function.
4278 */
4279void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm)
4280{
4281 spin_lock(&kvm->mmu_lock);
4282 kvm->arch.mmu_valid_gen++;
4283
4284 kvm_zap_obsolete_pages(kvm);
4285 spin_unlock(&kvm->mmu_lock);
4286}
4287
4198void kvm_mmu_zap_mmio_sptes(struct kvm *kvm) 4288void kvm_mmu_zap_mmio_sptes(struct kvm *kvm)
4199{ 4289{
4200 struct kvm_mmu_page *sp, *node; 4290 struct kvm_mmu_page *sp, *node;
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 2adcbc2cac6d..922bfae77c58 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -97,4 +97,5 @@ static inline bool permission_fault(struct kvm_mmu *mmu, unsigned pte_access,
97 return (mmu->permissions[pfec >> 1] >> pte_access) & 1; 97 return (mmu->permissions[pfec >> 1] >> pte_access) & 1;
98} 98}
99 99
100void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm);
100#endif 101#endif