aboutsummaryrefslogtreecommitdiffstats
path: root/mm/list_lru.c
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2017-10-03 19:16:10 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-10-03 20:54:25 -0400
commitf80c7dab95a1f0f968acbafe4426ee9525b6f6ab (patch)
treea1b164abe3e6599998869c894eb1da0da0d2064f /mm/list_lru.c
parent3181c38e4df257852a0c0a53552fd5c869402886 (diff)
mm: memcontrol: use vmalloc fallback for large kmem memcg arrays
For quick per-memcg indexing, slab caches and list_lru structures maintain linear arrays of descriptors. As the number of concurrent memory cgroups in the system goes up, this requires large contiguous allocations (8k cgroups = order-5, 16k cgroups = order-6 etc.) for every existing slab cache and list_lru, which can easily fail on loaded systems. E.g.: mkdir: page allocation failure: order:5, mode:0x14040c0(GFP_KERNEL|__GFP_COMP), nodemask=(null) CPU: 1 PID: 6399 Comm: mkdir Not tainted 4.13.0-mm1-00065-g720bbe532b7c-dirty #481 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-20170228_101828-anatol 04/01/2014 Call Trace: ? __alloc_pages_direct_compact+0x4c/0x110 __alloc_pages_nodemask+0xf50/0x1430 alloc_pages_current+0x60/0xc0 kmalloc_order_trace+0x29/0x1b0 __kmalloc+0x1f4/0x320 memcg_update_all_list_lrus+0xca/0x2e0 mem_cgroup_css_alloc+0x612/0x670 cgroup_apply_control_enable+0x19e/0x360 cgroup_mkdir+0x322/0x490 kernfs_iop_mkdir+0x55/0x80 vfs_mkdir+0xd0/0x120 SyS_mkdirat+0x6c/0xe0 SyS_mkdir+0x14/0x20 entry_SYSCALL_64_fastpath+0x18/0xad Mem-Info: active_anon:2965 inactive_anon:19 isolated_anon:0 active_file:100270 inactive_file:98846 isolated_file:0 unevictable:0 dirty:0 writeback:0 unstable:0 slab_reclaimable:7328 slab_unreclaimable:16402 mapped:771 shmem:52 pagetables:278 bounce:0 free:13718 free_pcp:0 free_cma:0 This output is from an artificial reproducer, but we have repeatedly observed order-7 failures in production in the Facebook fleet. These systems become useless as they cannot run more jobs, even though there is plenty of memory to allocate 128 individual pages. Use kvmalloc and kvzalloc to fall back to vmalloc space if these arrays prove too large for allocating them physically contiguous. Link: http://lkml.kernel.org/r/20170918184919.20644-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Reviewed-by: Josef Bacik <jbacik@fb.com> Acked-by: Michal Hocko <mhocko@suse.com> Acked-by: Vladimir Davydov <vdavydov.dev@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/list_lru.c')
-rw-r--r--mm/list_lru.c12
1 files changed, 6 insertions, 6 deletions
diff --git a/mm/list_lru.c b/mm/list_lru.c
index 7a40fa2be858..f141f0c80ff3 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -325,12 +325,12 @@ static int memcg_init_list_lru_node(struct list_lru_node *nlru)
325{ 325{
326 int size = memcg_nr_cache_ids; 326 int size = memcg_nr_cache_ids;
327 327
328 nlru->memcg_lrus = kmalloc(size * sizeof(void *), GFP_KERNEL); 328 nlru->memcg_lrus = kvmalloc(size * sizeof(void *), GFP_KERNEL);
329 if (!nlru->memcg_lrus) 329 if (!nlru->memcg_lrus)
330 return -ENOMEM; 330 return -ENOMEM;
331 331
332 if (__memcg_init_list_lru_node(nlru->memcg_lrus, 0, size)) { 332 if (__memcg_init_list_lru_node(nlru->memcg_lrus, 0, size)) {
333 kfree(nlru->memcg_lrus); 333 kvfree(nlru->memcg_lrus);
334 return -ENOMEM; 334 return -ENOMEM;
335 } 335 }
336 336
@@ -340,7 +340,7 @@ static int memcg_init_list_lru_node(struct list_lru_node *nlru)
340static void memcg_destroy_list_lru_node(struct list_lru_node *nlru) 340static void memcg_destroy_list_lru_node(struct list_lru_node *nlru)
341{ 341{
342 __memcg_destroy_list_lru_node(nlru->memcg_lrus, 0, memcg_nr_cache_ids); 342 __memcg_destroy_list_lru_node(nlru->memcg_lrus, 0, memcg_nr_cache_ids);
343 kfree(nlru->memcg_lrus); 343 kvfree(nlru->memcg_lrus);
344} 344}
345 345
346static int memcg_update_list_lru_node(struct list_lru_node *nlru, 346static int memcg_update_list_lru_node(struct list_lru_node *nlru,
@@ -351,12 +351,12 @@ static int memcg_update_list_lru_node(struct list_lru_node *nlru,
351 BUG_ON(old_size > new_size); 351 BUG_ON(old_size > new_size);
352 352
353 old = nlru->memcg_lrus; 353 old = nlru->memcg_lrus;
354 new = kmalloc(new_size * sizeof(void *), GFP_KERNEL); 354 new = kvmalloc(new_size * sizeof(void *), GFP_KERNEL);
355 if (!new) 355 if (!new)
356 return -ENOMEM; 356 return -ENOMEM;
357 357
358 if (__memcg_init_list_lru_node(new, old_size, new_size)) { 358 if (__memcg_init_list_lru_node(new, old_size, new_size)) {
359 kfree(new); 359 kvfree(new);
360 return -ENOMEM; 360 return -ENOMEM;
361 } 361 }
362 362
@@ -373,7 +373,7 @@ static int memcg_update_list_lru_node(struct list_lru_node *nlru,
373 nlru->memcg_lrus = new; 373 nlru->memcg_lrus = new;
374 spin_unlock_irq(&nlru->lock); 374 spin_unlock_irq(&nlru->lock);
375 375
376 kfree(old); 376 kvfree(old);
377 return 0; 377 return 0;
378} 378}
379 379