aboutsummaryrefslogtreecommitdiffstats
path: root/mm/slab.c
diff options
context:
space:
mode:
authorJoonsoo Kim <iamjoonsoo.kim@lge.com>2014-10-09 18:26:27 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-10-09 22:25:51 -0400
commitbf0dea23a9c094ae869a88bb694fbe966671bf6d (patch)
tree420ca01f321664323b3ad0eeead8f2b4e04cd51e /mm/slab.c
parent12220dea07f1ac6ac717707104773d771c3f3077 (diff)
mm/slab: use percpu allocator for cpu cache
Because of chicken and egg problem, initialization of SLAB is really complicated. We need to allocate cpu cache through SLAB to make the kmem_cache work, but before initialization of kmem_cache, allocation through SLAB is impossible. On the other hand, SLUB does initialization in a more simple way. It uses percpu allocator to allocate cpu cache so there is no chicken and egg problem. So, this patch try to use percpu allocator in SLAB. This simplifies the initialization step in SLAB so that we could maintain SLAB code more easily. In my testing there is no performance difference. This implementation relies on percpu allocator. Because percpu allocator uses vmalloc address space, vmalloc address space could be exhausted by this change on many cpu system with *32 bit* kernel. This implementation can cover 1024 cpus in worst case by following calculation. Worst: 1024 cpus * 4 bytes for pointer * 300 kmem_caches * 120 objects per cpu_cache = 140 MB Normal: 1024 cpus * 4 bytes for pointer * 150 kmem_caches(slab merge) * 80 objects per cpu_cache = 46 MB Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com> Acked-by: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Cc: Jeremiah Mahler <jmmahler@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/slab.c')
-rw-r--r--mm/slab.c239
1 files changed, 75 insertions, 164 deletions
diff --git a/mm/slab.c b/mm/slab.c
index 328233a724af..655d65c3f010 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -237,11 +237,10 @@ struct arraycache_init {
237/* 237/*
238 * Need this for bootstrapping a per node allocator. 238 * Need this for bootstrapping a per node allocator.
239 */ 239 */
240#define NUM_INIT_LISTS (3 * MAX_NUMNODES) 240#define NUM_INIT_LISTS (2 * MAX_NUMNODES)
241static struct kmem_cache_node __initdata init_kmem_cache_node[NUM_INIT_LISTS]; 241static struct kmem_cache_node __initdata init_kmem_cache_node[NUM_INIT_LISTS];
242#define CACHE_CACHE 0 242#define CACHE_CACHE 0
243#define SIZE_AC MAX_NUMNODES 243#define SIZE_NODE (MAX_NUMNODES)
244#define SIZE_NODE (2 * MAX_NUMNODES)
245 244
246static int drain_freelist(struct kmem_cache *cache, 245static int drain_freelist(struct kmem_cache *cache,
247 struct kmem_cache_node *n, int tofree); 246 struct kmem_cache_node *n, int tofree);
@@ -253,7 +252,6 @@ static void cache_reap(struct work_struct *unused);
253 252
254static int slab_early_init = 1; 253static int slab_early_init = 1;
255 254
256#define INDEX_AC kmalloc_index(sizeof(struct arraycache_init))
257#define INDEX_NODE kmalloc_index(sizeof(struct kmem_cache_node)) 255#define INDEX_NODE kmalloc_index(sizeof(struct kmem_cache_node))
258 256
259static void kmem_cache_node_init(struct kmem_cache_node *parent) 257static void kmem_cache_node_init(struct kmem_cache_node *parent)
@@ -458,9 +456,6 @@ static inline unsigned int obj_to_index(const struct kmem_cache *cache,
458 return reciprocal_divide(offset, cache->reciprocal_buffer_size); 456 return reciprocal_divide(offset, cache->reciprocal_buffer_size);
459} 457}
460 458
461static struct arraycache_init initarray_generic =
462 { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
463
464/* internal cache of cache description objs */ 459/* internal cache of cache description objs */
465static struct kmem_cache kmem_cache_boot = { 460static struct kmem_cache kmem_cache_boot = {
466 .batchcount = 1, 461 .batchcount = 1,
@@ -476,7 +471,7 @@ static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
476 471
477static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) 472static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
478{ 473{
479 return cachep->array[smp_processor_id()]; 474 return this_cpu_ptr(cachep->cpu_cache);
480} 475}
481 476
482static size_t calculate_freelist_size(int nr_objs, size_t align) 477static size_t calculate_freelist_size(int nr_objs, size_t align)
@@ -1096,24 +1091,25 @@ static void cpuup_canceled(long cpu)
1096 struct alien_cache **alien; 1091 struct alien_cache **alien;
1097 LIST_HEAD(list); 1092 LIST_HEAD(list);
1098 1093
1099 /* cpu is dead; no one can alloc from it. */
1100 nc = cachep->array[cpu];
1101 cachep->array[cpu] = NULL;
1102 n = get_node(cachep, node); 1094 n = get_node(cachep, node);
1103
1104 if (!n) 1095 if (!n)
1105 goto free_array_cache; 1096 continue;
1106 1097
1107 spin_lock_irq(&n->list_lock); 1098 spin_lock_irq(&n->list_lock);
1108 1099
1109 /* Free limit for this kmem_cache_node */ 1100 /* Free limit for this kmem_cache_node */
1110 n->free_limit -= cachep->batchcount; 1101 n->free_limit -= cachep->batchcount;
1111 if (nc) 1102
1103 /* cpu is dead; no one can alloc from it. */
1104 nc = per_cpu_ptr(cachep->cpu_cache, cpu);
1105 if (nc) {
1112 free_block(cachep, nc->entry, nc->avail, node, &list); 1106 free_block(cachep, nc->entry, nc->avail, node, &list);
1107 nc->avail = 0;
1108 }
1113 1109
1114 if (!cpumask_empty(mask)) { 1110 if (!cpumask_empty(mask)) {
1115 spin_unlock_irq(&n->list_lock); 1111 spin_unlock_irq(&n->list_lock);
1116 goto free_array_cache; 1112 goto free_slab;
1117 } 1113 }
1118 1114
1119 shared = n->shared; 1115 shared = n->shared;
@@ -1133,9 +1129,9 @@ static void cpuup_canceled(long cpu)
1133 drain_alien_cache(cachep, alien); 1129 drain_alien_cache(cachep, alien);
1134 free_alien_cache(alien); 1130 free_alien_cache(alien);
1135 } 1131 }
1136free_array_cache: 1132
1133free_slab:
1137 slabs_destroy(cachep, &list); 1134 slabs_destroy(cachep, &list);
1138 kfree(nc);
1139 } 1135 }
1140 /* 1136 /*
1141 * In the previous loop, all the objects were freed to 1137 * In the previous loop, all the objects were freed to
@@ -1172,32 +1168,23 @@ static int cpuup_prepare(long cpu)
1172 * array caches 1168 * array caches
1173 */ 1169 */
1174 list_for_each_entry(cachep, &slab_caches, list) { 1170 list_for_each_entry(cachep, &slab_caches, list) {
1175 struct array_cache *nc;
1176 struct array_cache *shared = NULL; 1171 struct array_cache *shared = NULL;
1177 struct alien_cache **alien = NULL; 1172 struct alien_cache **alien = NULL;
1178 1173
1179 nc = alloc_arraycache(node, cachep->limit,
1180 cachep->batchcount, GFP_KERNEL);
1181 if (!nc)
1182 goto bad;
1183 if (cachep->shared) { 1174 if (cachep->shared) {
1184 shared = alloc_arraycache(node, 1175 shared = alloc_arraycache(node,
1185 cachep->shared * cachep->batchcount, 1176 cachep->shared * cachep->batchcount,
1186 0xbaadf00d, GFP_KERNEL); 1177 0xbaadf00d, GFP_KERNEL);
1187 if (!shared) { 1178 if (!shared)
1188 kfree(nc);
1189 goto bad; 1179 goto bad;
1190 }
1191 } 1180 }
1192 if (use_alien_caches) { 1181 if (use_alien_caches) {
1193 alien = alloc_alien_cache(node, cachep->limit, GFP_KERNEL); 1182 alien = alloc_alien_cache(node, cachep->limit, GFP_KERNEL);
1194 if (!alien) { 1183 if (!alien) {
1195 kfree(shared); 1184 kfree(shared);
1196 kfree(nc);
1197 goto bad; 1185 goto bad;
1198 } 1186 }
1199 } 1187 }
1200 cachep->array[cpu] = nc;
1201 n = get_node(cachep, node); 1188 n = get_node(cachep, node);
1202 BUG_ON(!n); 1189 BUG_ON(!n);
1203 1190
@@ -1389,15 +1376,6 @@ static void __init set_up_node(struct kmem_cache *cachep, int index)
1389} 1376}
1390 1377
1391/* 1378/*
1392 * The memory after the last cpu cache pointer is used for the
1393 * the node pointer.
1394 */
1395static void setup_node_pointer(struct kmem_cache *cachep)
1396{
1397 cachep->node = (struct kmem_cache_node **)&cachep->array[nr_cpu_ids];
1398}
1399
1400/*
1401 * Initialisation. Called after the page allocator have been initialised and 1379 * Initialisation. Called after the page allocator have been initialised and
1402 * before smp_init(). 1380 * before smp_init().
1403 */ 1381 */
@@ -1408,7 +1386,6 @@ void __init kmem_cache_init(void)
1408 BUILD_BUG_ON(sizeof(((struct page *)NULL)->lru) < 1386 BUILD_BUG_ON(sizeof(((struct page *)NULL)->lru) <
1409 sizeof(struct rcu_head)); 1387 sizeof(struct rcu_head));
1410 kmem_cache = &kmem_cache_boot; 1388 kmem_cache = &kmem_cache_boot;
1411 setup_node_pointer(kmem_cache);
1412 1389
1413 if (num_possible_nodes() == 1) 1390 if (num_possible_nodes() == 1)
1414 use_alien_caches = 0; 1391 use_alien_caches = 0;
@@ -1416,8 +1393,6 @@ void __init kmem_cache_init(void)
1416 for (i = 0; i < NUM_INIT_LISTS; i++) 1393 for (i = 0; i < NUM_INIT_LISTS; i++)
1417 kmem_cache_node_init(&init_kmem_cache_node[i]); 1394 kmem_cache_node_init(&init_kmem_cache_node[i]);
1418 1395
1419 set_up_node(kmem_cache, CACHE_CACHE);
1420
1421 /* 1396 /*
1422 * Fragmentation resistance on low memory - only use bigger 1397 * Fragmentation resistance on low memory - only use bigger
1423 * page orders on machines with more than 32MB of memory if 1398 * page orders on machines with more than 32MB of memory if
@@ -1452,49 +1427,22 @@ void __init kmem_cache_init(void)
1452 * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids 1427 * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids
1453 */ 1428 */
1454 create_boot_cache(kmem_cache, "kmem_cache", 1429 create_boot_cache(kmem_cache, "kmem_cache",
1455 offsetof(struct kmem_cache, array[nr_cpu_ids]) + 1430 offsetof(struct kmem_cache, node) +
1456 nr_node_ids * sizeof(struct kmem_cache_node *), 1431 nr_node_ids * sizeof(struct kmem_cache_node *),
1457 SLAB_HWCACHE_ALIGN); 1432 SLAB_HWCACHE_ALIGN);
1458 list_add(&kmem_cache->list, &slab_caches); 1433 list_add(&kmem_cache->list, &slab_caches);
1459 1434 slab_state = PARTIAL;
1460 /* 2+3) create the kmalloc caches */
1461 1435
1462 /* 1436 /*
1463 * Initialize the caches that provide memory for the array cache and the 1437 * Initialize the caches that provide memory for the kmem_cache_node
1464 * kmem_cache_node structures first. Without this, further allocations will 1438 * structures first. Without this, further allocations will bug.
1465 * bug.
1466 */ 1439 */
1467 1440 kmalloc_caches[INDEX_NODE] = create_kmalloc_cache("kmalloc-node",
1468 kmalloc_caches[INDEX_AC] = create_kmalloc_cache("kmalloc-ac",
1469 kmalloc_size(INDEX_AC), ARCH_KMALLOC_FLAGS);
1470
1471 if (INDEX_AC != INDEX_NODE)
1472 kmalloc_caches[INDEX_NODE] =
1473 create_kmalloc_cache("kmalloc-node",
1474 kmalloc_size(INDEX_NODE), ARCH_KMALLOC_FLAGS); 1441 kmalloc_size(INDEX_NODE), ARCH_KMALLOC_FLAGS);
1442 slab_state = PARTIAL_NODE;
1475 1443
1476 slab_early_init = 0; 1444 slab_early_init = 0;
1477 1445
1478 /* 4) Replace the bootstrap head arrays */
1479 {
1480 struct array_cache *ptr;
1481
1482 ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
1483
1484 memcpy(ptr, cpu_cache_get(kmem_cache),
1485 sizeof(struct arraycache_init));
1486
1487 kmem_cache->array[smp_processor_id()] = ptr;
1488
1489 ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
1490
1491 BUG_ON(cpu_cache_get(kmalloc_caches[INDEX_AC])
1492 != &initarray_generic.cache);
1493 memcpy(ptr, cpu_cache_get(kmalloc_caches[INDEX_AC]),
1494 sizeof(struct arraycache_init));
1495
1496 kmalloc_caches[INDEX_AC]->array[smp_processor_id()] = ptr;
1497 }
1498 /* 5) Replace the bootstrap kmem_cache_node */ 1446 /* 5) Replace the bootstrap kmem_cache_node */
1499 { 1447 {
1500 int nid; 1448 int nid;
@@ -1502,13 +1450,8 @@ void __init kmem_cache_init(void)
1502 for_each_online_node(nid) { 1450 for_each_online_node(nid) {
1503 init_list(kmem_cache, &init_kmem_cache_node[CACHE_CACHE + nid], nid); 1451 init_list(kmem_cache, &init_kmem_cache_node[CACHE_CACHE + nid], nid);
1504 1452
1505 init_list(kmalloc_caches[INDEX_AC], 1453 init_list(kmalloc_caches[INDEX_NODE],
1506 &init_kmem_cache_node[SIZE_AC + nid], nid);
1507
1508 if (INDEX_AC != INDEX_NODE) {
1509 init_list(kmalloc_caches[INDEX_NODE],
1510 &init_kmem_cache_node[SIZE_NODE + nid], nid); 1454 &init_kmem_cache_node[SIZE_NODE + nid], nid);
1511 }
1512 } 1455 }
1513 } 1456 }
1514 1457
@@ -2041,56 +1984,53 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
2041 return left_over; 1984 return left_over;
2042} 1985}
2043 1986
1987static struct array_cache __percpu *alloc_kmem_cache_cpus(
1988 struct kmem_cache *cachep, int entries, int batchcount)
1989{
1990 int cpu;
1991 size_t size;
1992 struct array_cache __percpu *cpu_cache;
1993
1994 size = sizeof(void *) * entries + sizeof(struct array_cache);
1995 cpu_cache = __alloc_percpu(size, 0);
1996
1997 if (!cpu_cache)
1998 return NULL;
1999
2000 for_each_possible_cpu(cpu) {
2001 init_arraycache(per_cpu_ptr(cpu_cache, cpu),
2002 entries, batchcount);
2003 }
2004
2005 return cpu_cache;
2006}
2007
2044static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) 2008static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
2045{ 2009{
2046 if (slab_state >= FULL) 2010 if (slab_state >= FULL)
2047 return enable_cpucache(cachep, gfp); 2011 return enable_cpucache(cachep, gfp);
2048 2012
2013 cachep->cpu_cache = alloc_kmem_cache_cpus(cachep, 1, 1);
2014 if (!cachep->cpu_cache)
2015 return 1;
2016
2049 if (slab_state == DOWN) { 2017 if (slab_state == DOWN) {
2050 /* 2018 /* Creation of first cache (kmem_cache). */
2051 * Note: Creation of first cache (kmem_cache). 2019 set_up_node(kmem_cache, CACHE_CACHE);
2052 * The setup_node is taken care
2053 * of by the caller of __kmem_cache_create
2054 */
2055 cachep->array[smp_processor_id()] = &initarray_generic.cache;
2056 slab_state = PARTIAL;
2057 } else if (slab_state == PARTIAL) { 2020 } else if (slab_state == PARTIAL) {
2058 /* 2021 /* For kmem_cache_node */
2059 * Note: the second kmem_cache_create must create the cache 2022 set_up_node(cachep, SIZE_NODE);
2060 * that's used by kmalloc(24), otherwise the creation of
2061 * further caches will BUG().
2062 */
2063 cachep->array[smp_processor_id()] = &initarray_generic.cache;
2064
2065 /*
2066 * If the cache that's used by kmalloc(sizeof(kmem_cache_node)) is
2067 * the second cache, then we need to set up all its node/,
2068 * otherwise the creation of further caches will BUG().
2069 */
2070 set_up_node(cachep, SIZE_AC);
2071 if (INDEX_AC == INDEX_NODE)
2072 slab_state = PARTIAL_NODE;
2073 else
2074 slab_state = PARTIAL_ARRAYCACHE;
2075 } else { 2023 } else {
2076 /* Remaining boot caches */ 2024 int node;
2077 cachep->array[smp_processor_id()] =
2078 kmalloc(sizeof(struct arraycache_init), gfp);
2079 2025
2080 if (slab_state == PARTIAL_ARRAYCACHE) { 2026 for_each_online_node(node) {
2081 set_up_node(cachep, SIZE_NODE); 2027 cachep->node[node] = kmalloc_node(
2082 slab_state = PARTIAL_NODE; 2028 sizeof(struct kmem_cache_node), gfp, node);
2083 } else { 2029 BUG_ON(!cachep->node[node]);
2084 int node; 2030 kmem_cache_node_init(cachep->node[node]);
2085 for_each_online_node(node) {
2086 cachep->node[node] =
2087 kmalloc_node(sizeof(struct kmem_cache_node),
2088 gfp, node);
2089 BUG_ON(!cachep->node[node]);
2090 kmem_cache_node_init(cachep->node[node]);
2091 }
2092 } 2031 }
2093 } 2032 }
2033
2094 cachep->node[numa_mem_id()]->next_reap = 2034 cachep->node[numa_mem_id()]->next_reap =
2095 jiffies + REAPTIMEOUT_NODE + 2035 jiffies + REAPTIMEOUT_NODE +
2096 ((unsigned long)cachep) % REAPTIMEOUT_NODE; 2036 ((unsigned long)cachep) % REAPTIMEOUT_NODE;
@@ -2213,7 +2153,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
2213 else 2153 else
2214 gfp = GFP_NOWAIT; 2154 gfp = GFP_NOWAIT;
2215 2155
2216 setup_node_pointer(cachep);
2217#if DEBUG 2156#if DEBUG
2218 2157
2219 /* 2158 /*
@@ -2470,8 +2409,7 @@ int __kmem_cache_shutdown(struct kmem_cache *cachep)
2470 if (rc) 2409 if (rc)
2471 return rc; 2410 return rc;
2472 2411
2473 for_each_online_cpu(i) 2412 free_percpu(cachep->cpu_cache);
2474 kfree(cachep->array[i]);
2475 2413
2476 /* NUMA: free the node structures */ 2414 /* NUMA: free the node structures */
2477 for_each_kmem_cache_node(cachep, i, n) { 2415 for_each_kmem_cache_node(cachep, i, n) {
@@ -3719,72 +3657,45 @@ fail:
3719 return -ENOMEM; 3657 return -ENOMEM;
3720} 3658}
3721 3659
3722struct ccupdate_struct {
3723 struct kmem_cache *cachep;
3724 struct array_cache *new[0];
3725};
3726
3727static void do_ccupdate_local(void *info)
3728{
3729 struct ccupdate_struct *new = info;
3730 struct array_cache *old;
3731
3732 check_irq_off();
3733 old = cpu_cache_get(new->cachep);
3734
3735 new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()];
3736 new->new[smp_processor_id()] = old;
3737}
3738
3739/* Always called with the slab_mutex held */ 3660/* Always called with the slab_mutex held */
3740static int __do_tune_cpucache(struct kmem_cache *cachep, int limit, 3661static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
3741 int batchcount, int shared, gfp_t gfp) 3662 int batchcount, int shared, gfp_t gfp)
3742{ 3663{
3743 struct ccupdate_struct *new; 3664 struct array_cache __percpu *cpu_cache, *prev;
3744 int i; 3665 int cpu;
3745 3666
3746 new = kzalloc(sizeof(*new) + nr_cpu_ids * sizeof(struct array_cache *), 3667 cpu_cache = alloc_kmem_cache_cpus(cachep, limit, batchcount);
3747 gfp); 3668 if (!cpu_cache)
3748 if (!new)
3749 return -ENOMEM; 3669 return -ENOMEM;
3750 3670
3751 for_each_online_cpu(i) { 3671 prev = cachep->cpu_cache;
3752 new->new[i] = alloc_arraycache(cpu_to_mem(i), limit, 3672 cachep->cpu_cache = cpu_cache;
3753 batchcount, gfp); 3673 kick_all_cpus_sync();
3754 if (!new->new[i]) {
3755 for (i--; i >= 0; i--)
3756 kfree(new->new[i]);
3757 kfree(new);
3758 return -ENOMEM;
3759 }
3760 }
3761 new->cachep = cachep;
3762
3763 on_each_cpu(do_ccupdate_local, (void *)new, 1);
3764 3674
3765 check_irq_on(); 3675 check_irq_on();
3766 cachep->batchcount = batchcount; 3676 cachep->batchcount = batchcount;
3767 cachep->limit = limit; 3677 cachep->limit = limit;
3768 cachep->shared = shared; 3678 cachep->shared = shared;
3769 3679
3770 for_each_online_cpu(i) { 3680 if (!prev)
3681 goto alloc_node;
3682
3683 for_each_online_cpu(cpu) {
3771 LIST_HEAD(list); 3684 LIST_HEAD(list);
3772 struct array_cache *ccold = new->new[i];
3773 int node; 3685 int node;
3774 struct kmem_cache_node *n; 3686 struct kmem_cache_node *n;
3687 struct array_cache *ac = per_cpu_ptr(prev, cpu);
3775 3688
3776 if (!ccold) 3689 node = cpu_to_mem(cpu);
3777 continue;
3778
3779 node = cpu_to_mem(i);
3780 n = get_node(cachep, node); 3690 n = get_node(cachep, node);
3781 spin_lock_irq(&n->list_lock); 3691 spin_lock_irq(&n->list_lock);
3782 free_block(cachep, ccold->entry, ccold->avail, node, &list); 3692 free_block(cachep, ac->entry, ac->avail, node, &list);
3783 spin_unlock_irq(&n->list_lock); 3693 spin_unlock_irq(&n->list_lock);
3784 slabs_destroy(cachep, &list); 3694 slabs_destroy(cachep, &list);
3785 kfree(ccold);
3786 } 3695 }
3787 kfree(new); 3696 free_percpu(prev);
3697
3698alloc_node:
3788 return alloc_kmem_cache_node(cachep, gfp); 3699 return alloc_kmem_cache_node(cachep, gfp);
3789} 3700}
3790 3701