aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Lameter <cl@linux-foundation.org>2009-12-18 17:26:20 -0500
committerPekka Enberg <penberg@cs.helsinki.fi>2009-12-20 02:29:18 -0500
commit9dfc6e68bfe6ee452efb1a4e9ca26a9007f2b864 (patch)
tree40e54f2819e176ceb95b8899265bd48751965c27
parent55639353a0035052d9ea6cfe4dde0ac7fcbb2c9f (diff)
SLUB: Use this_cpu operations in slub
Using per cpu allocations removes the needs for the per cpu arrays in the kmem_cache struct. These could get quite big if we have to support systems with thousands of cpus. The use of this_cpu_xx operations results in: 1. The size of kmem_cache for SMP configuration shrinks since we will only need 1 pointer instead of NR_CPUS. The same pointer can be used by all processors. Reduces cache footprint of the allocator. 2. We can dynamically size kmem_cache according to the actual nodes in the system meaning less memory overhead for configurations that may potentially support up to 1k NUMA nodes / 4k cpus. 3. We can remove the diddle widdle with allocating and releasing of kmem_cache_cpu structures when bringing up and shutting down cpus. The cpu alloc logic will do it all for us. Removes some portions of the cpu hotplug functionality. 4. Fastpath performance increases since per cpu pointer lookups and address calculations are avoided. V7-V8 - Convert missed get_cpu_slab() under CONFIG_SLUB_STATS Signed-off-by: Christoph Lameter <cl@linux-foundation.org> Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
-rw-r--r--include/linux/slub_def.h6
-rw-r--r--mm/slub.c202
2 files changed, 49 insertions, 159 deletions
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 1e14beb23f9b..17ebe0f89bf3 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -69,6 +69,7 @@ struct kmem_cache_order_objects {
69 * Slab cache management. 69 * Slab cache management.
70 */ 70 */
71struct kmem_cache { 71struct kmem_cache {
72 struct kmem_cache_cpu *cpu_slab;
72 /* Used for retriving partial slabs etc */ 73 /* Used for retriving partial slabs etc */
73 unsigned long flags; 74 unsigned long flags;
74 int size; /* The size of an object including meta data */ 75 int size; /* The size of an object including meta data */
@@ -104,11 +105,6 @@ struct kmem_cache {
104 int remote_node_defrag_ratio; 105 int remote_node_defrag_ratio;
105 struct kmem_cache_node *node[MAX_NUMNODES]; 106 struct kmem_cache_node *node[MAX_NUMNODES];
106#endif 107#endif
107#ifdef CONFIG_SMP
108 struct kmem_cache_cpu *cpu_slab[NR_CPUS];
109#else
110 struct kmem_cache_cpu cpu_slab;
111#endif
112}; 108};
113 109
114/* 110/*
diff --git a/mm/slub.c b/mm/slub.c
index 8d71aaf888d7..d6c9ecf629d5 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -242,15 +242,6 @@ static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
242#endif 242#endif
243} 243}
244 244
245static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu)
246{
247#ifdef CONFIG_SMP
248 return s->cpu_slab[cpu];
249#else
250 return &s->cpu_slab;
251#endif
252}
253
254/* Verify that a pointer has an address that is valid within a slab page */ 245/* Verify that a pointer has an address that is valid within a slab page */
255static inline int check_valid_pointer(struct kmem_cache *s, 246static inline int check_valid_pointer(struct kmem_cache *s,
256 struct page *page, const void *object) 247 struct page *page, const void *object)
@@ -1124,7 +1115,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1124 if (!page) 1115 if (!page)
1125 return NULL; 1116 return NULL;
1126 1117
1127 stat(get_cpu_slab(s, raw_smp_processor_id()), ORDER_FALLBACK); 1118 stat(this_cpu_ptr(s->cpu_slab), ORDER_FALLBACK);
1128 } 1119 }
1129 1120
1130 if (kmemcheck_enabled 1121 if (kmemcheck_enabled
@@ -1422,7 +1413,7 @@ static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node)
1422static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) 1413static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
1423{ 1414{
1424 struct kmem_cache_node *n = get_node(s, page_to_nid(page)); 1415 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1425 struct kmem_cache_cpu *c = get_cpu_slab(s, smp_processor_id()); 1416 struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);
1426 1417
1427 __ClearPageSlubFrozen(page); 1418 __ClearPageSlubFrozen(page);
1428 if (page->inuse) { 1419 if (page->inuse) {
@@ -1454,7 +1445,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
1454 slab_unlock(page); 1445 slab_unlock(page);
1455 } else { 1446 } else {
1456 slab_unlock(page); 1447 slab_unlock(page);
1457 stat(get_cpu_slab(s, raw_smp_processor_id()), FREE_SLAB); 1448 stat(__this_cpu_ptr(s->cpu_slab), FREE_SLAB);
1458 discard_slab(s, page); 1449 discard_slab(s, page);
1459 } 1450 }
1460 } 1451 }
@@ -1507,7 +1498,7 @@ static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1507 */ 1498 */
1508static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) 1499static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
1509{ 1500{
1510 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); 1501 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
1511 1502
1512 if (likely(c && c->page)) 1503 if (likely(c && c->page))
1513 flush_slab(s, c); 1504 flush_slab(s, c);
@@ -1673,7 +1664,7 @@ new_slab:
1673 local_irq_disable(); 1664 local_irq_disable();
1674 1665
1675 if (new) { 1666 if (new) {
1676 c = get_cpu_slab(s, smp_processor_id()); 1667 c = __this_cpu_ptr(s->cpu_slab);
1677 stat(c, ALLOC_SLAB); 1668 stat(c, ALLOC_SLAB);
1678 if (c->page) 1669 if (c->page)
1679 flush_slab(s, c); 1670 flush_slab(s, c);
@@ -1711,7 +1702,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
1711 void **object; 1702 void **object;
1712 struct kmem_cache_cpu *c; 1703 struct kmem_cache_cpu *c;
1713 unsigned long flags; 1704 unsigned long flags;
1714 unsigned int objsize; 1705 unsigned long objsize;
1715 1706
1716 gfpflags &= gfp_allowed_mask; 1707 gfpflags &= gfp_allowed_mask;
1717 1708
@@ -1722,14 +1713,14 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
1722 return NULL; 1713 return NULL;
1723 1714
1724 local_irq_save(flags); 1715 local_irq_save(flags);
1725 c = get_cpu_slab(s, smp_processor_id()); 1716 c = __this_cpu_ptr(s->cpu_slab);
1717 object = c->freelist;
1726 objsize = c->objsize; 1718 objsize = c->objsize;
1727 if (unlikely(!c->freelist || !node_match(c, node))) 1719 if (unlikely(!object || !node_match(c, node)))
1728 1720
1729 object = __slab_alloc(s, gfpflags, node, addr, c); 1721 object = __slab_alloc(s, gfpflags, node, addr, c);
1730 1722
1731 else { 1723 else {
1732 object = c->freelist;
1733 c->freelist = object[c->offset]; 1724 c->freelist = object[c->offset];
1734 stat(c, ALLOC_FASTPATH); 1725 stat(c, ALLOC_FASTPATH);
1735 } 1726 }
@@ -1800,7 +1791,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
1800 void **object = (void *)x; 1791 void **object = (void *)x;
1801 struct kmem_cache_cpu *c; 1792 struct kmem_cache_cpu *c;
1802 1793
1803 c = get_cpu_slab(s, raw_smp_processor_id()); 1794 c = __this_cpu_ptr(s->cpu_slab);
1804 stat(c, FREE_SLOWPATH); 1795 stat(c, FREE_SLOWPATH);
1805 slab_lock(page); 1796 slab_lock(page);
1806 1797
@@ -1872,7 +1863,7 @@ static __always_inline void slab_free(struct kmem_cache *s,
1872 1863
1873 kmemleak_free_recursive(x, s->flags); 1864 kmemleak_free_recursive(x, s->flags);
1874 local_irq_save(flags); 1865 local_irq_save(flags);
1875 c = get_cpu_slab(s, smp_processor_id()); 1866 c = __this_cpu_ptr(s->cpu_slab);
1876 kmemcheck_slab_free(s, object, c->objsize); 1867 kmemcheck_slab_free(s, object, c->objsize);
1877 debug_check_no_locks_freed(object, c->objsize); 1868 debug_check_no_locks_freed(object, c->objsize);
1878 if (!(s->flags & SLAB_DEBUG_OBJECTS)) 1869 if (!(s->flags & SLAB_DEBUG_OBJECTS))
@@ -2095,130 +2086,28 @@ init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s)
2095#endif 2086#endif
2096} 2087}
2097 2088
2098#ifdef CONFIG_SMP 2089static DEFINE_PER_CPU(struct kmem_cache_cpu, kmalloc_percpu[SLUB_PAGE_SHIFT]);
2099/*
2100 * Per cpu array for per cpu structures.
2101 *
2102 * The per cpu array places all kmem_cache_cpu structures from one processor
2103 * close together meaning that it becomes possible that multiple per cpu
2104 * structures are contained in one cacheline. This may be particularly
2105 * beneficial for the kmalloc caches.
2106 *
2107 * A desktop system typically has around 60-80 slabs. With 100 here we are
2108 * likely able to get per cpu structures for all caches from the array defined
2109 * here. We must be able to cover all kmalloc caches during bootstrap.
2110 *
2111 * If the per cpu array is exhausted then fall back to kmalloc
2112 * of individual cachelines. No sharing is possible then.
2113 */
2114#define NR_KMEM_CACHE_CPU 100
2115
2116static DEFINE_PER_CPU(struct kmem_cache_cpu [NR_KMEM_CACHE_CPU],
2117 kmem_cache_cpu);
2118
2119static DEFINE_PER_CPU(struct kmem_cache_cpu *, kmem_cache_cpu_free);
2120static DECLARE_BITMAP(kmem_cach_cpu_free_init_once, CONFIG_NR_CPUS);
2121
2122static struct kmem_cache_cpu *alloc_kmem_cache_cpu(struct kmem_cache *s,
2123 int cpu, gfp_t flags)
2124{
2125 struct kmem_cache_cpu *c = per_cpu(kmem_cache_cpu_free, cpu);
2126
2127 if (c)
2128 per_cpu(kmem_cache_cpu_free, cpu) =
2129 (void *)c->freelist;
2130 else {
2131 /* Table overflow: So allocate ourselves */
2132 c = kmalloc_node(
2133 ALIGN(sizeof(struct kmem_cache_cpu), cache_line_size()),
2134 flags, cpu_to_node(cpu));
2135 if (!c)
2136 return NULL;
2137 }
2138
2139 init_kmem_cache_cpu(s, c);
2140 return c;
2141}
2142
2143static void free_kmem_cache_cpu(struct kmem_cache_cpu *c, int cpu)
2144{
2145 if (c < per_cpu(kmem_cache_cpu, cpu) ||
2146 c >= per_cpu(kmem_cache_cpu, cpu) + NR_KMEM_CACHE_CPU) {
2147 kfree(c);
2148 return;
2149 }
2150 c->freelist = (void *)per_cpu(kmem_cache_cpu_free, cpu);
2151 per_cpu(kmem_cache_cpu_free, cpu) = c;
2152}
2153
2154static void free_kmem_cache_cpus(struct kmem_cache *s)
2155{
2156 int cpu;
2157
2158 for_each_online_cpu(cpu) {
2159 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
2160
2161 if (c) {
2162 s->cpu_slab[cpu] = NULL;
2163 free_kmem_cache_cpu(c, cpu);
2164 }
2165 }
2166}
2167
2168static int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
2169{
2170 int cpu;
2171
2172 for_each_online_cpu(cpu) {
2173 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
2174 2090
2175 if (c) 2091static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
2176 continue;
2177
2178 c = alloc_kmem_cache_cpu(s, cpu, flags);
2179 if (!c) {
2180 free_kmem_cache_cpus(s);
2181 return 0;
2182 }
2183 s->cpu_slab[cpu] = c;
2184 }
2185 return 1;
2186}
2187
2188/*
2189 * Initialize the per cpu array.
2190 */
2191static void init_alloc_cpu_cpu(int cpu)
2192{
2193 int i;
2194
2195 if (cpumask_test_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once)))
2196 return;
2197
2198 for (i = NR_KMEM_CACHE_CPU - 1; i >= 0; i--)
2199 free_kmem_cache_cpu(&per_cpu(kmem_cache_cpu, cpu)[i], cpu);
2200
2201 cpumask_set_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once));
2202}
2203
2204static void __init init_alloc_cpu(void)
2205{ 2092{
2206 int cpu; 2093 int cpu;
2207 2094
2208 for_each_online_cpu(cpu) 2095 if (s < kmalloc_caches + SLUB_PAGE_SHIFT && s >= kmalloc_caches)
2209 init_alloc_cpu_cpu(cpu); 2096 /*
2210 } 2097 * Boot time creation of the kmalloc array. Use static per cpu data
2098 * since the per cpu allocator is not available yet.
2099 */
2100 s->cpu_slab = per_cpu_var(kmalloc_percpu) + (s - kmalloc_caches);
2101 else
2102 s->cpu_slab = alloc_percpu(struct kmem_cache_cpu);
2211 2103
2212#else 2104 if (!s->cpu_slab)
2213static inline void free_kmem_cache_cpus(struct kmem_cache *s) {} 2105 return 0;
2214static inline void init_alloc_cpu(void) {}
2215 2106
2216static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags) 2107 for_each_possible_cpu(cpu)
2217{ 2108 init_kmem_cache_cpu(s, per_cpu_ptr(s->cpu_slab, cpu));
2218 init_kmem_cache_cpu(s, &s->cpu_slab);
2219 return 1; 2109 return 1;
2220} 2110}
2221#endif
2222 2111
2223#ifdef CONFIG_NUMA 2112#ifdef CONFIG_NUMA
2224/* 2113/*
@@ -2609,9 +2498,8 @@ static inline int kmem_cache_close(struct kmem_cache *s)
2609 int node; 2498 int node;
2610 2499
2611 flush_all(s); 2500 flush_all(s);
2612 2501 free_percpu(s->cpu_slab);
2613 /* Attempt to free all objects */ 2502 /* Attempt to free all objects */
2614 free_kmem_cache_cpus(s);
2615 for_each_node_state(node, N_NORMAL_MEMORY) { 2503 for_each_node_state(node, N_NORMAL_MEMORY) {
2616 struct kmem_cache_node *n = get_node(s, node); 2504 struct kmem_cache_node *n = get_node(s, node);
2617 2505
@@ -2760,7 +2648,19 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags)
2760 realsize = kmalloc_caches[index].objsize; 2648 realsize = kmalloc_caches[index].objsize;
2761 text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d", 2649 text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d",
2762 (unsigned int)realsize); 2650 (unsigned int)realsize);
2763 s = kmalloc(kmem_size, flags & ~SLUB_DMA); 2651
2652 if (flags & __GFP_WAIT)
2653 s = kmalloc(kmem_size, flags & ~SLUB_DMA);
2654 else {
2655 int i;
2656
2657 s = NULL;
2658 for (i = 0; i < SLUB_PAGE_SHIFT; i++)
2659 if (kmalloc_caches[i].size) {
2660 s = kmalloc_caches + i;
2661 break;
2662 }
2663 }
2764 2664
2765 /* 2665 /*
2766 * Must defer sysfs creation to a workqueue because we don't know 2666 * Must defer sysfs creation to a workqueue because we don't know
@@ -3176,8 +3076,6 @@ void __init kmem_cache_init(void)
3176 int i; 3076 int i;
3177 int caches = 0; 3077 int caches = 0;
3178 3078
3179 init_alloc_cpu();
3180
3181#ifdef CONFIG_NUMA 3079#ifdef CONFIG_NUMA
3182 /* 3080 /*
3183 * Must first have the slab cache available for the allocations of the 3081 * Must first have the slab cache available for the allocations of the
@@ -3261,8 +3159,10 @@ void __init kmem_cache_init(void)
3261 3159
3262#ifdef CONFIG_SMP 3160#ifdef CONFIG_SMP
3263 register_cpu_notifier(&slab_notifier); 3161 register_cpu_notifier(&slab_notifier);
3264 kmem_size = offsetof(struct kmem_cache, cpu_slab) + 3162#endif
3265 nr_cpu_ids * sizeof(struct kmem_cache_cpu *); 3163#ifdef CONFIG_NUMA
3164 kmem_size = offsetof(struct kmem_cache, node) +
3165 nr_node_ids * sizeof(struct kmem_cache_node *);
3266#else 3166#else
3267 kmem_size = sizeof(struct kmem_cache); 3167 kmem_size = sizeof(struct kmem_cache);
3268#endif 3168#endif
@@ -3365,7 +3265,7 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
3365 * per cpu structures 3265 * per cpu structures
3366 */ 3266 */
3367 for_each_online_cpu(cpu) 3267 for_each_online_cpu(cpu)
3368 get_cpu_slab(s, cpu)->objsize = s->objsize; 3268 per_cpu_ptr(s->cpu_slab, cpu)->objsize = s->objsize;
3369 3269
3370 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); 3270 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
3371 up_write(&slub_lock); 3271 up_write(&slub_lock);
@@ -3422,11 +3322,9 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
3422 switch (action) { 3322 switch (action) {
3423 case CPU_UP_PREPARE: 3323 case CPU_UP_PREPARE:
3424 case CPU_UP_PREPARE_FROZEN: 3324 case CPU_UP_PREPARE_FROZEN:
3425 init_alloc_cpu_cpu(cpu);
3426 down_read(&slub_lock); 3325 down_read(&slub_lock);
3427 list_for_each_entry(s, &slab_caches, list) 3326 list_for_each_entry(s, &slab_caches, list)
3428 s->cpu_slab[cpu] = alloc_kmem_cache_cpu(s, cpu, 3327 init_kmem_cache_cpu(s, per_cpu_ptr(s->cpu_slab, cpu));
3429 GFP_KERNEL);
3430 up_read(&slub_lock); 3328 up_read(&slub_lock);
3431 break; 3329 break;
3432 3330
@@ -3436,13 +3334,9 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
3436 case CPU_DEAD_FROZEN: 3334 case CPU_DEAD_FROZEN:
3437 down_read(&slub_lock); 3335 down_read(&slub_lock);
3438 list_for_each_entry(s, &slab_caches, list) { 3336 list_for_each_entry(s, &slab_caches, list) {
3439 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
3440
3441 local_irq_save(flags); 3337 local_irq_save(flags);
3442 __flush_cpu_slab(s, cpu); 3338 __flush_cpu_slab(s, cpu);
3443 local_irq_restore(flags); 3339 local_irq_restore(flags);
3444 free_kmem_cache_cpu(c, cpu);
3445 s->cpu_slab[cpu] = NULL;
3446 } 3340 }
3447 up_read(&slub_lock); 3341 up_read(&slub_lock);
3448 break; 3342 break;
@@ -3928,7 +3822,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
3928 int cpu; 3822 int cpu;
3929 3823
3930 for_each_possible_cpu(cpu) { 3824 for_each_possible_cpu(cpu) {
3931 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); 3825 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
3932 3826
3933 if (!c || c->node < 0) 3827 if (!c || c->node < 0)
3934 continue; 3828 continue;
@@ -4353,7 +4247,7 @@ static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
4353 return -ENOMEM; 4247 return -ENOMEM;
4354 4248
4355 for_each_online_cpu(cpu) { 4249 for_each_online_cpu(cpu) {
4356 unsigned x = get_cpu_slab(s, cpu)->stat[si]; 4250 unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
4357 4251
4358 data[cpu] = x; 4252 data[cpu] = x;
4359 sum += x; 4253 sum += x;
@@ -4376,7 +4270,7 @@ static void clear_stat(struct kmem_cache *s, enum stat_item si)
4376 int cpu; 4270 int cpu;
4377 4271
4378 for_each_online_cpu(cpu) 4272 for_each_online_cpu(cpu)
4379 get_cpu_slab(s, cpu)->stat[si] = 0; 4273 per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;
4380} 4274}
4381 4275
4382#define STAT_ATTR(si, text) \ 4276#define STAT_ATTR(si, text) \