aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Lameter <cl@linux-foundation.org>2009-12-18 17:26:22 -0500
committerPekka Enberg <penberg@cs.helsinki.fi>2009-12-20 03:17:59 -0500
commitff12059ed14b0773d7bbef86f98218ada6c20770 (patch)
tree33b63b4f746e984e8421ea3624312bc98ccb2f37
parent756dee75872a2a764b478e18076360b8a4ec9045 (diff)
SLUB: this_cpu: Remove slub kmem_cache fields
Remove the fields in struct kmem_cache_cpu that were used to cache data from struct kmem_cache when they were in different cachelines. The cacheline that holds the per cpu array pointer now also holds these values. We can cut down the struct kmem_cache_cpu size to almost half. The get_freepointer() and set_freepointer() functions that used to be only intended for the slow path now are also useful for the hot path since access to the size field does not require accessing an additional cacheline anymore. This results in consistent use of functions for setting the freepointer of objects throughout SLUB. Also we initialize all possible kmem_cache_cpu structures when a slab is created. No need to initialize them when a processor or node comes online. Signed-off-by: Christoph Lameter <cl@linux-foundation.org> Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
-rw-r--r--include/linux/slub_def.h2
-rw-r--r--mm/slub.c76
2 files changed, 17 insertions, 61 deletions
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index a78fb4ac2015..0249d4175bac 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -38,8 +38,6 @@ struct kmem_cache_cpu {
38 void **freelist; /* Pointer to first free per cpu object */ 38 void **freelist; /* Pointer to first free per cpu object */
39 struct page *page; /* The slab from which we are allocating */ 39 struct page *page; /* The slab from which we are allocating */
40 int node; /* The node of the page (or -1 for debug) */ 40 int node; /* The node of the page (or -1 for debug) */
41 unsigned int offset; /* Freepointer offset (in word units) */
42 unsigned int objsize; /* Size of an object (from kmem_cache) */
43#ifdef CONFIG_SLUB_STATS 41#ifdef CONFIG_SLUB_STATS
44 unsigned stat[NR_SLUB_STAT_ITEMS]; 42 unsigned stat[NR_SLUB_STAT_ITEMS];
45#endif 43#endif
diff --git a/mm/slub.c b/mm/slub.c
index cdb7f0214af0..30d2dde27563 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -260,13 +260,6 @@ static inline int check_valid_pointer(struct kmem_cache *s,
260 return 1; 260 return 1;
261} 261}
262 262
263/*
264 * Slow version of get and set free pointer.
265 *
266 * This version requires touching the cache lines of kmem_cache which
267 * we avoid to do in the fast alloc free paths. There we obtain the offset
268 * from the page struct.
269 */
270static inline void *get_freepointer(struct kmem_cache *s, void *object) 263static inline void *get_freepointer(struct kmem_cache *s, void *object)
271{ 264{
272 return *(void **)(object + s->offset); 265 return *(void **)(object + s->offset);
@@ -1473,10 +1466,10 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1473 1466
1474 /* Retrieve object from cpu_freelist */ 1467 /* Retrieve object from cpu_freelist */
1475 object = c->freelist; 1468 object = c->freelist;
1476 c->freelist = c->freelist[c->offset]; 1469 c->freelist = get_freepointer(s, c->freelist);
1477 1470
1478 /* And put onto the regular freelist */ 1471 /* And put onto the regular freelist */
1479 object[c->offset] = page->freelist; 1472 set_freepointer(s, object, page->freelist);
1480 page->freelist = object; 1473 page->freelist = object;
1481 page->inuse--; 1474 page->inuse--;
1482 } 1475 }
@@ -1635,7 +1628,7 @@ load_freelist:
1635 if (unlikely(SLABDEBUG && PageSlubDebug(c->page))) 1628 if (unlikely(SLABDEBUG && PageSlubDebug(c->page)))
1636 goto debug; 1629 goto debug;
1637 1630
1638 c->freelist = object[c->offset]; 1631 c->freelist = get_freepointer(s, object);
1639 c->page->inuse = c->page->objects; 1632 c->page->inuse = c->page->objects;
1640 c->page->freelist = NULL; 1633 c->page->freelist = NULL;
1641 c->node = page_to_nid(c->page); 1634 c->node = page_to_nid(c->page);
@@ -1681,7 +1674,7 @@ debug:
1681 goto another_slab; 1674 goto another_slab;
1682 1675
1683 c->page->inuse++; 1676 c->page->inuse++;
1684 c->page->freelist = object[c->offset]; 1677 c->page->freelist = get_freepointer(s, object);
1685 c->node = -1; 1678 c->node = -1;
1686 goto unlock_out; 1679 goto unlock_out;
1687} 1680}
@@ -1702,7 +1695,6 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
1702 void **object; 1695 void **object;
1703 struct kmem_cache_cpu *c; 1696 struct kmem_cache_cpu *c;
1704 unsigned long flags; 1697 unsigned long flags;
1705 unsigned long objsize;
1706 1698
1707 gfpflags &= gfp_allowed_mask; 1699 gfpflags &= gfp_allowed_mask;
1708 1700
@@ -1715,22 +1707,21 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
1715 local_irq_save(flags); 1707 local_irq_save(flags);
1716 c = __this_cpu_ptr(s->cpu_slab); 1708 c = __this_cpu_ptr(s->cpu_slab);
1717 object = c->freelist; 1709 object = c->freelist;
1718 objsize = c->objsize;
1719 if (unlikely(!object || !node_match(c, node))) 1710 if (unlikely(!object || !node_match(c, node)))
1720 1711
1721 object = __slab_alloc(s, gfpflags, node, addr, c); 1712 object = __slab_alloc(s, gfpflags, node, addr, c);
1722 1713
1723 else { 1714 else {
1724 c->freelist = object[c->offset]; 1715 c->freelist = get_freepointer(s, object);
1725 stat(c, ALLOC_FASTPATH); 1716 stat(c, ALLOC_FASTPATH);
1726 } 1717 }
1727 local_irq_restore(flags); 1718 local_irq_restore(flags);
1728 1719
1729 if (unlikely(gfpflags & __GFP_ZERO) && object) 1720 if (unlikely(gfpflags & __GFP_ZERO) && object)
1730 memset(object, 0, objsize); 1721 memset(object, 0, s->objsize);
1731 1722
1732 kmemcheck_slab_alloc(s, gfpflags, object, c->objsize); 1723 kmemcheck_slab_alloc(s, gfpflags, object, s->objsize);
1733 kmemleak_alloc_recursive(object, objsize, 1, s->flags, gfpflags); 1724 kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, gfpflags);
1734 1725
1735 return object; 1726 return object;
1736} 1727}
@@ -1785,7 +1776,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_node_notrace);
1785 * handling required then we can return immediately. 1776 * handling required then we can return immediately.
1786 */ 1777 */
1787static void __slab_free(struct kmem_cache *s, struct page *page, 1778static void __slab_free(struct kmem_cache *s, struct page *page,
1788 void *x, unsigned long addr, unsigned int offset) 1779 void *x, unsigned long addr)
1789{ 1780{
1790 void *prior; 1781 void *prior;
1791 void **object = (void *)x; 1782 void **object = (void *)x;
@@ -1799,7 +1790,8 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
1799 goto debug; 1790 goto debug;
1800 1791
1801checks_ok: 1792checks_ok:
1802 prior = object[offset] = page->freelist; 1793 prior = page->freelist;
1794 set_freepointer(s, object, prior);
1803 page->freelist = object; 1795 page->freelist = object;
1804 page->inuse--; 1796 page->inuse--;
1805 1797
@@ -1864,16 +1856,16 @@ static __always_inline void slab_free(struct kmem_cache *s,
1864 kmemleak_free_recursive(x, s->flags); 1856 kmemleak_free_recursive(x, s->flags);
1865 local_irq_save(flags); 1857 local_irq_save(flags);
1866 c = __this_cpu_ptr(s->cpu_slab); 1858 c = __this_cpu_ptr(s->cpu_slab);
1867 kmemcheck_slab_free(s, object, c->objsize); 1859 kmemcheck_slab_free(s, object, s->objsize);
1868 debug_check_no_locks_freed(object, c->objsize); 1860 debug_check_no_locks_freed(object, s->objsize);
1869 if (!(s->flags & SLAB_DEBUG_OBJECTS)) 1861 if (!(s->flags & SLAB_DEBUG_OBJECTS))
1870 debug_check_no_obj_freed(object, c->objsize); 1862 debug_check_no_obj_freed(object, s->objsize);
1871 if (likely(page == c->page && c->node >= 0)) { 1863 if (likely(page == c->page && c->node >= 0)) {
1872 object[c->offset] = c->freelist; 1864 set_freepointer(s, object, c->freelist);
1873 c->freelist = object; 1865 c->freelist = object;
1874 stat(c, FREE_FASTPATH); 1866 stat(c, FREE_FASTPATH);
1875 } else 1867 } else
1876 __slab_free(s, page, x, addr, c->offset); 1868 __slab_free(s, page, x, addr);
1877 1869
1878 local_irq_restore(flags); 1870 local_irq_restore(flags);
1879} 1871}
@@ -2060,19 +2052,6 @@ static unsigned long calculate_alignment(unsigned long flags,
2060 return ALIGN(align, sizeof(void *)); 2052 return ALIGN(align, sizeof(void *));
2061} 2053}
2062 2054
2063static void init_kmem_cache_cpu(struct kmem_cache *s,
2064 struct kmem_cache_cpu *c)
2065{
2066 c->page = NULL;
2067 c->freelist = NULL;
2068 c->node = 0;
2069 c->offset = s->offset / sizeof(void *);
2070 c->objsize = s->objsize;
2071#ifdef CONFIG_SLUB_STATS
2072 memset(c->stat, 0, NR_SLUB_STAT_ITEMS * sizeof(unsigned));
2073#endif
2074}
2075
2076static void 2055static void
2077init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s) 2056init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s)
2078{ 2057{
@@ -2090,8 +2069,6 @@ static DEFINE_PER_CPU(struct kmem_cache_cpu, kmalloc_percpu[SLUB_PAGE_SHIFT]);
2090 2069
2091static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags) 2070static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
2092{ 2071{
2093 int cpu;
2094
2095 if (s < kmalloc_caches + KMALLOC_CACHES && s >= kmalloc_caches) 2072 if (s < kmalloc_caches + KMALLOC_CACHES && s >= kmalloc_caches)
2096 /* 2073 /*
2097 * Boot time creation of the kmalloc array. Use static per cpu data 2074 * Boot time creation of the kmalloc array. Use static per cpu data
@@ -2104,8 +2081,6 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
2104 if (!s->cpu_slab) 2081 if (!s->cpu_slab)
2105 return 0; 2082 return 0;
2106 2083
2107 for_each_possible_cpu(cpu)
2108 init_kmem_cache_cpu(s, per_cpu_ptr(s->cpu_slab, cpu));
2109 return 1; 2084 return 1;
2110} 2085}
2111 2086
@@ -2391,6 +2366,7 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags,
2391 2366
2392 if (alloc_kmem_cache_cpus(s, gfpflags & ~SLUB_DMA)) 2367 if (alloc_kmem_cache_cpus(s, gfpflags & ~SLUB_DMA))
2393 return 1; 2368 return 1;
2369
2394 free_kmem_cache_nodes(s); 2370 free_kmem_cache_nodes(s);
2395error: 2371error:
2396 if (flags & SLAB_PANIC) 2372 if (flags & SLAB_PANIC)
@@ -3247,22 +3223,12 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
3247 down_write(&slub_lock); 3223 down_write(&slub_lock);
3248 s = find_mergeable(size, align, flags, name, ctor); 3224 s = find_mergeable(size, align, flags, name, ctor);
3249 if (s) { 3225 if (s) {
3250 int cpu;
3251
3252 s->refcount++; 3226 s->refcount++;
3253 /* 3227 /*
3254 * Adjust the object sizes so that we clear 3228 * Adjust the object sizes so that we clear
3255 * the complete object on kzalloc. 3229 * the complete object on kzalloc.
3256 */ 3230 */
3257 s->objsize = max(s->objsize, (int)size); 3231 s->objsize = max(s->objsize, (int)size);
3258
3259 /*
3260 * And then we need to update the object size in the
3261 * per cpu structures
3262 */
3263 for_each_online_cpu(cpu)
3264 per_cpu_ptr(s->cpu_slab, cpu)->objsize = s->objsize;
3265
3266 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); 3232 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
3267 up_write(&slub_lock); 3233 up_write(&slub_lock);
3268 3234
@@ -3316,14 +3282,6 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
3316 unsigned long flags; 3282 unsigned long flags;
3317 3283
3318 switch (action) { 3284 switch (action) {
3319 case CPU_UP_PREPARE:
3320 case CPU_UP_PREPARE_FROZEN:
3321 down_read(&slub_lock);
3322 list_for_each_entry(s, &slab_caches, list)
3323 init_kmem_cache_cpu(s, per_cpu_ptr(s->cpu_slab, cpu));
3324 up_read(&slub_lock);
3325 break;
3326
3327 case CPU_UP_CANCELED: 3285 case CPU_UP_CANCELED:
3328 case CPU_UP_CANCELED_FROZEN: 3286 case CPU_UP_CANCELED_FROZEN:
3329 case CPU_DEAD: 3287 case CPU_DEAD: