aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/mmu.c
diff options
context:
space:
mode:
authorDave Hansen <dave@linux.vnet.ibm.com>2010-08-19 21:11:37 -0400
committerAvi Kivity <avi@redhat.com>2010-10-24 04:51:19 -0400
commit45221ab6684a82a5b60208b76d6f8bfb1bbcb969 (patch)
treebdc915bf20cc9dfb40b81b7601ed5182c047d13a /arch/x86/kvm/mmu.c
parent49d5ca26636cb8feb05aff92fc4dba3e494ec683 (diff)
KVM: create aggregate kvm_total_used_mmu_pages value
Of slab shrinkers, the VM code says: * Note that 'shrink' will be passed nr_to_scan == 0 when the VM is * querying the cache size, so a fastpath for that case is appropriate. and it *means* it. Look at how it calls the shrinkers: nr_before = (*shrinker->shrink)(0, gfp_mask); shrink_ret = (*shrinker->shrink)(this_scan, gfp_mask); So, if you do anything stupid in your shrinker, the VM will doubly punish you. The mmu_shrink() function takes the global kvm_lock, then acquires every VM's kvm->mmu_lock in sequence. If we have 100 VMs, then we're going to take 101 locks. We do it twice, so each call takes 202 locks. If we're under memory pressure, we can have each cpu trying to do this. It can get really hairy, and we've seen lock spinning in mmu_shrink() be the dominant entry in profiles. This is guaranteed to optimize at least half of those lock aquisitions away. It removes the need to take any of the locks when simply trying to count objects. A 'percpu_counter' can be a large object, but we only have one of these for the entire system. There are not any better alternatives at the moment, especially ones that handle CPU hotplug. Signed-off-by: Dave Hansen <dave@linux.vnet.ibm.com> Signed-off-by: Tim Pepper <lnxninja@linux.vnet.ibm.com> Signed-off-by: Avi Kivity <avi@redhat.com>
Diffstat (limited to 'arch/x86/kvm/mmu.c')
-rw-r--r--arch/x86/kvm/mmu.c34
1 files changed, 24 insertions, 10 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ff39b85d7a4..33d7af50cf8 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -178,6 +178,7 @@ typedef void (*mmu_parent_walk_fn) (struct kvm_mmu_page *sp, u64 *spte);
178static struct kmem_cache *pte_chain_cache; 178static struct kmem_cache *pte_chain_cache;
179static struct kmem_cache *rmap_desc_cache; 179static struct kmem_cache *rmap_desc_cache;
180static struct kmem_cache *mmu_page_header_cache; 180static struct kmem_cache *mmu_page_header_cache;
181static struct percpu_counter kvm_total_used_mmu_pages;
181 182
182static u64 __read_mostly shadow_trap_nonpresent_pte; 183static u64 __read_mostly shadow_trap_nonpresent_pte;
183static u64 __read_mostly shadow_notrap_nonpresent_pte; 184static u64 __read_mostly shadow_notrap_nonpresent_pte;
@@ -971,6 +972,18 @@ static int is_empty_shadow_page(u64 *spt)
971} 972}
972#endif 973#endif
973 974
975/*
976 * This value is the sum of all of the kvm instances's
977 * kvm->arch.n_used_mmu_pages values. We need a global,
978 * aggregate version in order to make the slab shrinker
979 * faster
980 */
981static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, int nr)
982{
983 kvm->arch.n_used_mmu_pages += nr;
984 percpu_counter_add(&kvm_total_used_mmu_pages, nr);
985}
986
974static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp) 987static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp)
975{ 988{
976 ASSERT(is_empty_shadow_page(sp->spt)); 989 ASSERT(is_empty_shadow_page(sp->spt));
@@ -980,7 +993,7 @@ static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp)
980 if (!sp->role.direct) 993 if (!sp->role.direct)
981 __free_page(virt_to_page(sp->gfns)); 994 __free_page(virt_to_page(sp->gfns));
982 kmem_cache_free(mmu_page_header_cache, sp); 995 kmem_cache_free(mmu_page_header_cache, sp);
983 --kvm->arch.n_used_mmu_pages; 996 kvm_mod_used_mmu_pages(kvm, -1);
984} 997}
985 998
986static unsigned kvm_page_table_hashfn(gfn_t gfn) 999static unsigned kvm_page_table_hashfn(gfn_t gfn)
@@ -1003,7 +1016,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
1003 bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); 1016 bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
1004 sp->multimapped = 0; 1017 sp->multimapped = 0;
1005 sp->parent_pte = parent_pte; 1018 sp->parent_pte = parent_pte;
1006 ++vcpu->kvm->arch.n_used_mmu_pages; 1019 kvm_mod_used_mmu_pages(vcpu->kvm, +1);
1007 return sp; 1020 return sp;
1008} 1021}
1009 1022
@@ -3122,23 +3135,22 @@ static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
3122{ 3135{
3123 struct kvm *kvm; 3136 struct kvm *kvm;
3124 struct kvm *kvm_freed = NULL; 3137 struct kvm *kvm_freed = NULL;
3125 int cache_count = 0; 3138
3139 if (nr_to_scan == 0)
3140 goto out;
3126 3141
3127 spin_lock(&kvm_lock); 3142 spin_lock(&kvm_lock);
3128 3143
3129 list_for_each_entry(kvm, &vm_list, vm_list) { 3144 list_for_each_entry(kvm, &vm_list, vm_list) {
3130 int npages, idx, freed_pages; 3145 int idx, freed_pages;
3131 LIST_HEAD(invalid_list); 3146 LIST_HEAD(invalid_list);
3132 3147
3133 idx = srcu_read_lock(&kvm->srcu); 3148 idx = srcu_read_lock(&kvm->srcu);
3134 spin_lock(&kvm->mmu_lock); 3149 spin_lock(&kvm->mmu_lock);
3135 npages = kvm->arch.n_max_mmu_pages - 3150 if (!kvm_freed && nr_to_scan > 0 &&
3136 kvm_mmu_available_pages(kvm); 3151 kvm->arch.n_used_mmu_pages > 0) {
3137 cache_count += npages;
3138 if (!kvm_freed && nr_to_scan > 0 && npages > 0) {
3139 freed_pages = kvm_mmu_remove_some_alloc_mmu_pages(kvm, 3152 freed_pages = kvm_mmu_remove_some_alloc_mmu_pages(kvm,
3140 &invalid_list); 3153 &invalid_list);
3141 cache_count -= freed_pages;
3142 kvm_freed = kvm; 3154 kvm_freed = kvm;
3143 } 3155 }
3144 nr_to_scan--; 3156 nr_to_scan--;
@@ -3152,7 +3164,8 @@ static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
3152 3164
3153 spin_unlock(&kvm_lock); 3165 spin_unlock(&kvm_lock);
3154 3166
3155 return cache_count; 3167out:
3168 return percpu_counter_read_positive(&kvm_total_used_mmu_pages);
3156} 3169}
3157 3170
3158static struct shrinker mmu_shrinker = { 3171static struct shrinker mmu_shrinker = {
@@ -3195,6 +3208,7 @@ int kvm_mmu_module_init(void)
3195 if (!mmu_page_header_cache) 3208 if (!mmu_page_header_cache)
3196 goto nomem; 3209 goto nomem;
3197 3210
3211 percpu_counter_init(&kvm_total_used_mmu_pages, 0);
3198 register_shrinker(&mmu_shrinker); 3212 register_shrinker(&mmu_shrinker);
3199 3213
3200 return 0; 3214 return 0;