aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2017-02-22 18:41:24 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-02-22 19:41:27 -0500
commit510ded33e075c2bd662b1efab0110f4240325fc9 (patch)
tree9199fa1031aac4fcf633ae89a01233a8988e23fc
parentbc2791f857e1984b7548d2a2de2ffb1a913dee62 (diff)
slab: implement slab_root_caches list
With kmem cgroup support enabled, kmem_caches can be created and destroyed frequently and a great number of near empty kmem_caches can accumulate if there are a lot of transient cgroups and the system is not under memory pressure. When memory reclaim starts under such conditions, it can lead to consecutive deactivation and destruction of many kmem_caches, easily hundreds of thousands on moderately large systems, exposing scalability issues in the current slab management code. This is one of the patches to address the issue. slab_caches currently lists all caches including root and memcg ones. This is the only data structure which lists the root caches and iterating root caches can only be done by walking the list while skipping over memcg caches. As there can be a huge number of memcg caches, this can become very expensive. This also can make /proc/slabinfo behave very badly. seq_file processes reads in 4k chunks and seeks to the previous Nth position on slab_caches list to resume after each chunk. With a lot of memcg cache churns on the list, reading /proc/slabinfo can become very slow and its content often ends up with duplicate and/or missing entries. This patch adds a new list slab_root_caches which lists only the root caches. When memcg is not enabled, it becomes just an alias of slab_caches. memcg specific list operations are collected into memcg_[un]link_cache(). Link: http://lkml.kernel.org/r/20170117235411.9408-7-tj@kernel.org Signed-off-by: Tejun Heo <tj@kernel.org> Reported-by: Jay Vana <jsvana@fb.com> Acked-by: Vladimir Davydov <vdavydov@tarantool.org> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/slab.h3
-rw-r--r--mm/slab.h15
-rw-r--r--mm/slab_common.c59
-rw-r--r--mm/slub.c1
4 files changed, 53 insertions, 25 deletions
diff --git a/include/linux/slab.h b/include/linux/slab.h
index a0cc7a77cda2..af1a5bef80f4 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -556,6 +556,8 @@ struct memcg_cache_array {
556 * used to index child cachces during allocation and cleared 556 * used to index child cachces during allocation and cleared
557 * early during shutdown. 557 * early during shutdown.
558 * 558 *
559 * @root_caches_node: List node for slab_root_caches list.
560 *
559 * @children: List of all child caches. While the child caches are also 561 * @children: List of all child caches. While the child caches are also
560 * reachable through @memcg_caches, a child cache remains on 562 * reachable through @memcg_caches, a child cache remains on
561 * this list until it is actually destroyed. 563 * this list until it is actually destroyed.
@@ -573,6 +575,7 @@ struct memcg_cache_params {
573 union { 575 union {
574 struct { 576 struct {
575 struct memcg_cache_array __rcu *memcg_caches; 577 struct memcg_cache_array __rcu *memcg_caches;
578 struct list_head __root_caches_node;
576 struct list_head children; 579 struct list_head children;
577 }; 580 };
578 struct { 581 struct {
diff --git a/mm/slab.h b/mm/slab.h
index a08f01016a3f..9631bb27c772 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -201,6 +201,11 @@ void __kmem_cache_free_bulk(struct kmem_cache *, size_t, void **);
201int __kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **); 201int __kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **);
202 202
203#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB) 203#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
204
205/* List of all root caches. */
206extern struct list_head slab_root_caches;
207#define root_caches_node memcg_params.__root_caches_node
208
204/* 209/*
205 * Iterate over all memcg caches of the given root cache. The caller must hold 210 * Iterate over all memcg caches of the given root cache. The caller must hold
206 * slab_mutex. 211 * slab_mutex.
@@ -300,9 +305,14 @@ static __always_inline void memcg_uncharge_slab(struct page *page, int order,
300} 305}
301 306
302extern void slab_init_memcg_params(struct kmem_cache *); 307extern void slab_init_memcg_params(struct kmem_cache *);
308extern void memcg_link_cache(struct kmem_cache *s);
303 309
304#else /* CONFIG_MEMCG && !CONFIG_SLOB */ 310#else /* CONFIG_MEMCG && !CONFIG_SLOB */
305 311
312/* If !memcg, all caches are root. */
313#define slab_root_caches slab_caches
314#define root_caches_node list
315
306#define for_each_memcg_cache(iter, root) \ 316#define for_each_memcg_cache(iter, root) \
307 for ((void)(iter), (void)(root); 0; ) 317 for ((void)(iter), (void)(root); 0; )
308 318
@@ -347,6 +357,11 @@ static inline void memcg_uncharge_slab(struct page *page, int order,
347static inline void slab_init_memcg_params(struct kmem_cache *s) 357static inline void slab_init_memcg_params(struct kmem_cache *s)
348{ 358{
349} 359}
360
361static inline void memcg_link_cache(struct kmem_cache *s)
362{
363}
364
350#endif /* CONFIG_MEMCG && !CONFIG_SLOB */ 365#endif /* CONFIG_MEMCG && !CONFIG_SLOB */
351 366
352static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x) 367static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
diff --git a/mm/slab_common.c b/mm/slab_common.c
index c3bbeddaeaaf..274697e1a42a 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -138,6 +138,9 @@ int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr,
138} 138}
139 139
140#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB) 140#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
141
142LIST_HEAD(slab_root_caches);
143
141void slab_init_memcg_params(struct kmem_cache *s) 144void slab_init_memcg_params(struct kmem_cache *s)
142{ 145{
143 s->memcg_params.root_cache = NULL; 146 s->memcg_params.root_cache = NULL;
@@ -183,9 +186,6 @@ static int update_memcg_params(struct kmem_cache *s, int new_array_size)
183{ 186{
184 struct memcg_cache_array *old, *new; 187 struct memcg_cache_array *old, *new;
185 188
186 if (!is_root_cache(s))
187 return 0;
188
189 new = kzalloc(sizeof(struct memcg_cache_array) + 189 new = kzalloc(sizeof(struct memcg_cache_array) +
190 new_array_size * sizeof(void *), GFP_KERNEL); 190 new_array_size * sizeof(void *), GFP_KERNEL);
191 if (!new) 191 if (!new)
@@ -209,7 +209,7 @@ int memcg_update_all_caches(int num_memcgs)
209 int ret = 0; 209 int ret = 0;
210 210
211 mutex_lock(&slab_mutex); 211 mutex_lock(&slab_mutex);
212 list_for_each_entry(s, &slab_caches, list) { 212 list_for_each_entry(s, &slab_root_caches, root_caches_node) {
213 ret = update_memcg_params(s, num_memcgs); 213 ret = update_memcg_params(s, num_memcgs);
214 /* 214 /*
215 * Instead of freeing the memory, we'll just leave the caches 215 * Instead of freeing the memory, we'll just leave the caches
@@ -222,10 +222,26 @@ int memcg_update_all_caches(int num_memcgs)
222 return ret; 222 return ret;
223} 223}
224 224
225static void unlink_memcg_cache(struct kmem_cache *s) 225void memcg_link_cache(struct kmem_cache *s)
226{
227 if (is_root_cache(s)) {
228 list_add(&s->root_caches_node, &slab_root_caches);
229 } else {
230 list_add(&s->memcg_params.children_node,
231 &s->memcg_params.root_cache->memcg_params.children);
232 list_add(&s->memcg_params.kmem_caches_node,
233 &s->memcg_params.memcg->kmem_caches);
234 }
235}
236
237static void memcg_unlink_cache(struct kmem_cache *s)
226{ 238{
227 list_del(&s->memcg_params.children_node); 239 if (is_root_cache(s)) {
228 list_del(&s->memcg_params.kmem_caches_node); 240 list_del(&s->root_caches_node);
241 } else {
242 list_del(&s->memcg_params.children_node);
243 list_del(&s->memcg_params.kmem_caches_node);
244 }
229} 245}
230#else 246#else
231static inline int init_memcg_params(struct kmem_cache *s, 247static inline int init_memcg_params(struct kmem_cache *s,
@@ -238,7 +254,7 @@ static inline void destroy_memcg_params(struct kmem_cache *s)
238{ 254{
239} 255}
240 256
241static inline void unlink_memcg_cache(struct kmem_cache *s) 257static inline void memcg_unlink_cache(struct kmem_cache *s)
242{ 258{
243} 259}
244#endif /* CONFIG_MEMCG && !CONFIG_SLOB */ 260#endif /* CONFIG_MEMCG && !CONFIG_SLOB */
@@ -285,7 +301,7 @@ struct kmem_cache *find_mergeable(size_t size, size_t align,
285 if (flags & SLAB_NEVER_MERGE) 301 if (flags & SLAB_NEVER_MERGE)
286 return NULL; 302 return NULL;
287 303
288 list_for_each_entry_reverse(s, &slab_caches, list) { 304 list_for_each_entry_reverse(s, &slab_root_caches, root_caches_node) {
289 if (slab_unmergeable(s)) 305 if (slab_unmergeable(s))
290 continue; 306 continue;
291 307
@@ -369,6 +385,7 @@ static struct kmem_cache *create_cache(const char *name,
369 385
370 s->refcount = 1; 386 s->refcount = 1;
371 list_add(&s->list, &slab_caches); 387 list_add(&s->list, &slab_caches);
388 memcg_link_cache(s);
372out: 389out:
373 if (err) 390 if (err)
374 return ERR_PTR(err); 391 return ERR_PTR(err);
@@ -514,9 +531,8 @@ static int shutdown_cache(struct kmem_cache *s)
514 if (__kmem_cache_shutdown(s) != 0) 531 if (__kmem_cache_shutdown(s) != 0)
515 return -EBUSY; 532 return -EBUSY;
516 533
534 memcg_unlink_cache(s);
517 list_del(&s->list); 535 list_del(&s->list);
518 if (!is_root_cache(s))
519 unlink_memcg_cache(s);
520 536
521 if (s->flags & SLAB_DESTROY_BY_RCU) { 537 if (s->flags & SLAB_DESTROY_BY_RCU) {
522 list_add_tail(&s->list, &slab_caches_to_rcu_destroy); 538 list_add_tail(&s->list, &slab_caches_to_rcu_destroy);
@@ -596,10 +612,6 @@ void memcg_create_kmem_cache(struct mem_cgroup *memcg,
596 goto out_unlock; 612 goto out_unlock;
597 } 613 }
598 614
599 list_add(&s->memcg_params.children_node,
600 &root_cache->memcg_params.children);
601 list_add(&s->memcg_params.kmem_caches_node, &memcg->kmem_caches);
602
603 /* 615 /*
604 * Since readers won't lock (see cache_from_memcg_idx()), we need a 616 * Since readers won't lock (see cache_from_memcg_idx()), we need a
605 * barrier here to ensure nobody will see the kmem_cache partially 617 * barrier here to ensure nobody will see the kmem_cache partially
@@ -627,10 +639,7 @@ void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg)
627 get_online_mems(); 639 get_online_mems();
628 640
629 mutex_lock(&slab_mutex); 641 mutex_lock(&slab_mutex);
630 list_for_each_entry(s, &slab_caches, list) { 642 list_for_each_entry(s, &slab_root_caches, root_caches_node) {
631 if (!is_root_cache(s))
632 continue;
633
634 arr = rcu_dereference_protected(s->memcg_params.memcg_caches, 643 arr = rcu_dereference_protected(s->memcg_params.memcg_caches,
635 lockdep_is_held(&slab_mutex)); 644 lockdep_is_held(&slab_mutex));
636 c = arr->entries[idx]; 645 c = arr->entries[idx];
@@ -829,6 +838,7 @@ struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size,
829 838
830 create_boot_cache(s, name, size, flags); 839 create_boot_cache(s, name, size, flags);
831 list_add(&s->list, &slab_caches); 840 list_add(&s->list, &slab_caches);
841 memcg_link_cache(s);
832 s->refcount = 1; 842 s->refcount = 1;
833 return s; 843 return s;
834} 844}
@@ -1136,12 +1146,12 @@ static void print_slabinfo_header(struct seq_file *m)
1136void *slab_start(struct seq_file *m, loff_t *pos) 1146void *slab_start(struct seq_file *m, loff_t *pos)
1137{ 1147{
1138 mutex_lock(&slab_mutex); 1148 mutex_lock(&slab_mutex);
1139 return seq_list_start(&slab_caches, *pos); 1149 return seq_list_start(&slab_root_caches, *pos);
1140} 1150}
1141 1151
1142void *slab_next(struct seq_file *m, void *p, loff_t *pos) 1152void *slab_next(struct seq_file *m, void *p, loff_t *pos)
1143{ 1153{
1144 return seq_list_next(p, &slab_caches, pos); 1154 return seq_list_next(p, &slab_root_caches, pos);
1145} 1155}
1146 1156
1147void slab_stop(struct seq_file *m, void *p) 1157void slab_stop(struct seq_file *m, void *p)
@@ -1193,12 +1203,11 @@ static void cache_show(struct kmem_cache *s, struct seq_file *m)
1193 1203
1194static int slab_show(struct seq_file *m, void *p) 1204static int slab_show(struct seq_file *m, void *p)
1195{ 1205{
1196 struct kmem_cache *s = list_entry(p, struct kmem_cache, list); 1206 struct kmem_cache *s = list_entry(p, struct kmem_cache, root_caches_node);
1197 1207
1198 if (p == slab_caches.next) 1208 if (p == slab_root_caches.next)
1199 print_slabinfo_header(m); 1209 print_slabinfo_header(m);
1200 if (is_root_cache(s)) 1210 cache_show(s, m);
1201 cache_show(s, m);
1202 return 0; 1211 return 0;
1203} 1212}
1204 1213
diff --git a/mm/slub.c b/mm/slub.c
index caac5456f0ec..03b012bcb5fa 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -4127,6 +4127,7 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
4127 } 4127 }
4128 slab_init_memcg_params(s); 4128 slab_init_memcg_params(s);
4129 list_add(&s->list, &slab_caches); 4129 list_add(&s->list, &slab_caches);
4130 memcg_link_cache(s);
4130 return s; 4131 return s;
4131} 4132}
4132 4133