diff options
author | Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com> | 2013-05-30 20:36:29 -0400 |
---|---|---|
committer | Gleb Natapov <gleb@redhat.com> | 2013-06-05 05:33:33 -0400 |
commit | 365c886860c4ba670d245e762b23987c912c129a (patch) | |
tree | 81f59edf5ba5e4c944d5590a631bbd23419e2cd8 | |
parent | f34d251d66ba263c077ed9d2bbd1874339a4c887 (diff) |
KVM: MMU: reclaim the zapped-obsolete page first
As Marcelo pointed out that
| "(retention of large number of pages while zapping)
| can be fatal, it can lead to OOM and host crash"
We introduce a list, kvm->arch.zapped_obsolete_pages, to link all
the pages which are deleted from the mmu cache but not actually
freed. When page reclaiming is needed, we always zap this kind of
pages first.
Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Reviewed-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 21 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 1 |
3 files changed, 20 insertions, 4 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index bff7d464a6ae..1f98c1bb5b7a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -536,6 +536,8 @@ struct kvm_arch { | |||
536 | * Hash table of struct kvm_mmu_page. | 536 | * Hash table of struct kvm_mmu_page. |
537 | */ | 537 | */ |
538 | struct list_head active_mmu_pages; | 538 | struct list_head active_mmu_pages; |
539 | struct list_head zapped_obsolete_pages; | ||
540 | |||
539 | struct list_head assigned_dev_head; | 541 | struct list_head assigned_dev_head; |
540 | struct iommu_domain *iommu_domain; | 542 | struct iommu_domain *iommu_domain; |
541 | int iommu_flags; | 543 | int iommu_flags; |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 674c0442ac89..79af88ab2f1d 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -4211,7 +4211,6 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | |||
4211 | static void kvm_zap_obsolete_pages(struct kvm *kvm) | 4211 | static void kvm_zap_obsolete_pages(struct kvm *kvm) |
4212 | { | 4212 | { |
4213 | struct kvm_mmu_page *sp, *node; | 4213 | struct kvm_mmu_page *sp, *node; |
4214 | LIST_HEAD(invalid_list); | ||
4215 | int batch = 0; | 4214 | int batch = 0; |
4216 | 4215 | ||
4217 | restart: | 4216 | restart: |
@@ -4244,7 +4243,8 @@ restart: | |||
4244 | goto restart; | 4243 | goto restart; |
4245 | } | 4244 | } |
4246 | 4245 | ||
4247 | ret = kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); | 4246 | ret = kvm_mmu_prepare_zap_page(kvm, sp, |
4247 | &kvm->arch.zapped_obsolete_pages); | ||
4248 | batch += ret; | 4248 | batch += ret; |
4249 | 4249 | ||
4250 | if (ret) | 4250 | if (ret) |
@@ -4255,7 +4255,7 @@ restart: | |||
4255 | * Should flush tlb before free page tables since lockless-walking | 4255 | * Should flush tlb before free page tables since lockless-walking |
4256 | * may use the pages. | 4256 | * may use the pages. |
4257 | */ | 4257 | */ |
4258 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | 4258 | kvm_mmu_commit_zap_page(kvm, &kvm->arch.zapped_obsolete_pages); |
4259 | } | 4259 | } |
4260 | 4260 | ||
4261 | /* | 4261 | /* |
@@ -4306,6 +4306,11 @@ restart: | |||
4306 | spin_unlock(&kvm->mmu_lock); | 4306 | spin_unlock(&kvm->mmu_lock); |
4307 | } | 4307 | } |
4308 | 4308 | ||
4309 | static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm) | ||
4310 | { | ||
4311 | return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages)); | ||
4312 | } | ||
4313 | |||
4309 | static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) | 4314 | static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) |
4310 | { | 4315 | { |
4311 | struct kvm *kvm; | 4316 | struct kvm *kvm; |
@@ -4334,15 +4339,23 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) | |||
4334 | * want to shrink a VM that only started to populate its MMU | 4339 | * want to shrink a VM that only started to populate its MMU |
4335 | * anyway. | 4340 | * anyway. |
4336 | */ | 4341 | */ |
4337 | if (!kvm->arch.n_used_mmu_pages) | 4342 | if (!kvm->arch.n_used_mmu_pages && |
4343 | !kvm_has_zapped_obsolete_pages(kvm)) | ||
4338 | continue; | 4344 | continue; |
4339 | 4345 | ||
4340 | idx = srcu_read_lock(&kvm->srcu); | 4346 | idx = srcu_read_lock(&kvm->srcu); |
4341 | spin_lock(&kvm->mmu_lock); | 4347 | spin_lock(&kvm->mmu_lock); |
4342 | 4348 | ||
4349 | if (kvm_has_zapped_obsolete_pages(kvm)) { | ||
4350 | kvm_mmu_commit_zap_page(kvm, | ||
4351 | &kvm->arch.zapped_obsolete_pages); | ||
4352 | goto unlock; | ||
4353 | } | ||
4354 | |||
4343 | prepare_zap_oldest_mmu_page(kvm, &invalid_list); | 4355 | prepare_zap_oldest_mmu_page(kvm, &invalid_list); |
4344 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | 4356 | kvm_mmu_commit_zap_page(kvm, &invalid_list); |
4345 | 4357 | ||
4358 | unlock: | ||
4346 | spin_unlock(&kvm->mmu_lock); | 4359 | spin_unlock(&kvm->mmu_lock); |
4347 | srcu_read_unlock(&kvm->srcu, idx); | 4360 | srcu_read_unlock(&kvm->srcu, idx); |
4348 | 4361 | ||
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 15e10f7e68ac..6402951d5f3b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -6832,6 +6832,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
6832 | return -EINVAL; | 6832 | return -EINVAL; |
6833 | 6833 | ||
6834 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); | 6834 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); |
6835 | INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); | ||
6835 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); | 6836 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); |
6836 | 6837 | ||
6837 | /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ | 6838 | /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ |