aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-03-22 12:36:23 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-03-22 12:36:23 -0400
commit14577beb8293c187a12d2e78ac6250d5dcec2190 (patch)
treecca4d8feba497870e3a5936ba2a65c68ce4ddf6b
parent09b9cc44c942256026bf7a63fec2155b8f488899 (diff)
parente8c500c2b64b6e237e67ecba7249e72363c47047 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/slab-2.6
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/slab-2.6: slub: Dont define useless label in the !CONFIG_CMPXCHG_LOCAL case slab,rcu: don't assume the size of struct rcu_head slub,rcu: don't assume the size of struct rcu_head slub: automatically reserve bytes at the end of slab Lockless (and preemptless) fastpaths for slub slub: Get rid of slab_free_hook_irq() slub: min_partial needs to be in first cacheline slub: fix ksize() build error slub: fix kmemcheck calls to match ksize() hints Revert "slab: Fix missing DEBUG_SLAB last user" mm: Remove support for kmem_cache_name()
-rw-r--r--include/linux/slab.h1
-rw-r--r--include/linux/slub_def.h8
-rw-r--r--mm/slab.c55
-rw-r--r--mm/slob.c6
-rw-r--r--mm/slub.c366
5 files changed, 333 insertions, 103 deletions
diff --git a/include/linux/slab.h b/include/linux/slab.h
index fa9086647eb7..ad4dd1c8d30a 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -105,7 +105,6 @@ void kmem_cache_destroy(struct kmem_cache *);
105int kmem_cache_shrink(struct kmem_cache *); 105int kmem_cache_shrink(struct kmem_cache *);
106void kmem_cache_free(struct kmem_cache *, void *); 106void kmem_cache_free(struct kmem_cache *, void *);
107unsigned int kmem_cache_size(struct kmem_cache *); 107unsigned int kmem_cache_size(struct kmem_cache *);
108const char *kmem_cache_name(struct kmem_cache *);
109 108
110/* 109/*
111 * Please use this macro to create slab caches. Simply specify the 110 * Please use this macro to create slab caches. Simply specify the
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 8b6e8ae5d5ca..90fbb6d87e11 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -35,7 +35,10 @@ enum stat_item {
35 NR_SLUB_STAT_ITEMS }; 35 NR_SLUB_STAT_ITEMS };
36 36
37struct kmem_cache_cpu { 37struct kmem_cache_cpu {
38 void **freelist; /* Pointer to first free per cpu object */ 38 void **freelist; /* Pointer to next available object */
39#ifdef CONFIG_CMPXCHG_LOCAL
40 unsigned long tid; /* Globally unique transaction id */
41#endif
39 struct page *page; /* The slab from which we are allocating */ 42 struct page *page; /* The slab from which we are allocating */
40 int node; /* The node of the page (or -1 for debug) */ 43 int node; /* The node of the page (or -1 for debug) */
41#ifdef CONFIG_SLUB_STATS 44#ifdef CONFIG_SLUB_STATS
@@ -70,6 +73,7 @@ struct kmem_cache {
70 struct kmem_cache_cpu __percpu *cpu_slab; 73 struct kmem_cache_cpu __percpu *cpu_slab;
71 /* Used for retriving partial slabs etc */ 74 /* Used for retriving partial slabs etc */
72 unsigned long flags; 75 unsigned long flags;
76 unsigned long min_partial;
73 int size; /* The size of an object including meta data */ 77 int size; /* The size of an object including meta data */
74 int objsize; /* The size of an object without meta data */ 78 int objsize; /* The size of an object without meta data */
75 int offset; /* Free pointer offset. */ 79 int offset; /* Free pointer offset. */
@@ -83,7 +87,7 @@ struct kmem_cache {
83 void (*ctor)(void *); 87 void (*ctor)(void *);
84 int inuse; /* Offset to metadata */ 88 int inuse; /* Offset to metadata */
85 int align; /* Alignment */ 89 int align; /* Alignment */
86 unsigned long min_partial; 90 int reserved; /* Reserved bytes at the end of slabs */
87 const char *name; /* Name (only for display!) */ 91 const char *name; /* Name (only for display!) */
88 struct list_head list; /* List of slab caches */ 92 struct list_head list; /* List of slab caches */
89#ifdef CONFIG_SYSFS 93#ifdef CONFIG_SYSFS
diff --git a/mm/slab.c b/mm/slab.c
index 37961d1f584f..a18ba57517af 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -191,22 +191,6 @@ typedef unsigned int kmem_bufctl_t;
191#define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-3) 191#define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-3)
192 192
193/* 193/*
194 * struct slab
195 *
196 * Manages the objs in a slab. Placed either at the beginning of mem allocated
197 * for a slab, or allocated from an general cache.
198 * Slabs are chained into three list: fully used, partial, fully free slabs.
199 */
200struct slab {
201 struct list_head list;
202 unsigned long colouroff;
203 void *s_mem; /* including colour offset */
204 unsigned int inuse; /* num of objs active in slab */
205 kmem_bufctl_t free;
206 unsigned short nodeid;
207};
208
209/*
210 * struct slab_rcu 194 * struct slab_rcu
211 * 195 *
212 * slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to 196 * slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to
@@ -219,8 +203,6 @@ struct slab {
219 * 203 *
220 * rcu_read_lock before reading the address, then rcu_read_unlock after 204 * rcu_read_lock before reading the address, then rcu_read_unlock after
221 * taking the spinlock within the structure expected at that address. 205 * taking the spinlock within the structure expected at that address.
222 *
223 * We assume struct slab_rcu can overlay struct slab when destroying.
224 */ 206 */
225struct slab_rcu { 207struct slab_rcu {
226 struct rcu_head head; 208 struct rcu_head head;
@@ -229,6 +211,27 @@ struct slab_rcu {
229}; 211};
230 212
231/* 213/*
214 * struct slab
215 *
216 * Manages the objs in a slab. Placed either at the beginning of mem allocated
217 * for a slab, or allocated from an general cache.
218 * Slabs are chained into three list: fully used, partial, fully free slabs.
219 */
220struct slab {
221 union {
222 struct {
223 struct list_head list;
224 unsigned long colouroff;
225 void *s_mem; /* including colour offset */
226 unsigned int inuse; /* num of objs active in slab */
227 kmem_bufctl_t free;
228 unsigned short nodeid;
229 };
230 struct slab_rcu __slab_cover_slab_rcu;
231 };
232};
233
234/*
232 * struct array_cache 235 * struct array_cache
233 * 236 *
234 * Purpose: 237 * Purpose:
@@ -2147,8 +2150,6 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
2147 * 2150 *
2148 * @name must be valid until the cache is destroyed. This implies that 2151 * @name must be valid until the cache is destroyed. This implies that
2149 * the module calling this has to destroy the cache before getting unloaded. 2152 * the module calling this has to destroy the cache before getting unloaded.
2150 * Note that kmem_cache_name() is not guaranteed to return the same pointer,
2151 * therefore applications must manage it themselves.
2152 * 2153 *
2153 * The flags are 2154 * The flags are
2154 * 2155 *
@@ -2288,8 +2289,8 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2288 if (ralign < align) { 2289 if (ralign < align) {
2289 ralign = align; 2290 ralign = align;
2290 } 2291 }
2291 /* disable debug if not aligning with REDZONE_ALIGN */ 2292 /* disable debug if necessary */
2292 if (ralign & (__alignof__(unsigned long long) - 1)) 2293 if (ralign > __alignof__(unsigned long long))
2293 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); 2294 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
2294 /* 2295 /*
2295 * 4) Store it. 2296 * 4) Store it.
@@ -2315,8 +2316,8 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2315 */ 2316 */
2316 if (flags & SLAB_RED_ZONE) { 2317 if (flags & SLAB_RED_ZONE) {
2317 /* add space for red zone words */ 2318 /* add space for red zone words */
2318 cachep->obj_offset += align; 2319 cachep->obj_offset += sizeof(unsigned long long);
2319 size += align + sizeof(unsigned long long); 2320 size += 2 * sizeof(unsigned long long);
2320 } 2321 }
2321 if (flags & SLAB_STORE_USER) { 2322 if (flags & SLAB_STORE_USER) {
2322 /* user store requires one word storage behind the end of 2323 /* user store requires one word storage behind the end of
@@ -3840,12 +3841,6 @@ unsigned int kmem_cache_size(struct kmem_cache *cachep)
3840} 3841}
3841EXPORT_SYMBOL(kmem_cache_size); 3842EXPORT_SYMBOL(kmem_cache_size);
3842 3843
3843const char *kmem_cache_name(struct kmem_cache *cachep)
3844{
3845 return cachep->name;
3846}
3847EXPORT_SYMBOL_GPL(kmem_cache_name);
3848
3849/* 3844/*
3850 * This initializes kmem_list3 or resizes various caches for all nodes. 3845 * This initializes kmem_list3 or resizes various caches for all nodes.
3851 */ 3846 */
diff --git a/mm/slob.c b/mm/slob.c
index 3588eaaef726..46e0aee33a23 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -666,12 +666,6 @@ unsigned int kmem_cache_size(struct kmem_cache *c)
666} 666}
667EXPORT_SYMBOL(kmem_cache_size); 667EXPORT_SYMBOL(kmem_cache_size);
668 668
669const char *kmem_cache_name(struct kmem_cache *c)
670{
671 return c->name;
672}
673EXPORT_SYMBOL(kmem_cache_name);
674
675int kmem_cache_shrink(struct kmem_cache *d) 669int kmem_cache_shrink(struct kmem_cache *d)
676{ 670{
677 return 0; 671 return 0;
diff --git a/mm/slub.c b/mm/slub.c
index e15aa7f193c9..7e4f835e32ab 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -281,11 +281,40 @@ static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
281 return (p - addr) / s->size; 281 return (p - addr) / s->size;
282} 282}
283 283
284static inline size_t slab_ksize(const struct kmem_cache *s)
285{
286#ifdef CONFIG_SLUB_DEBUG
287 /*
288 * Debugging requires use of the padding between object
289 * and whatever may come after it.
290 */
291 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
292 return s->objsize;
293
294#endif
295 /*
296 * If we have the need to store the freelist pointer
297 * back there or track user information then we can
298 * only use the space before that information.
299 */
300 if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
301 return s->inuse;
302 /*
303 * Else we can use all the padding etc for the allocation
304 */
305 return s->size;
306}
307
308static inline int order_objects(int order, unsigned long size, int reserved)
309{
310 return ((PAGE_SIZE << order) - reserved) / size;
311}
312
284static inline struct kmem_cache_order_objects oo_make(int order, 313static inline struct kmem_cache_order_objects oo_make(int order,
285 unsigned long size) 314 unsigned long size, int reserved)
286{ 315{
287 struct kmem_cache_order_objects x = { 316 struct kmem_cache_order_objects x = {
288 (order << OO_SHIFT) + (PAGE_SIZE << order) / size 317 (order << OO_SHIFT) + order_objects(order, size, reserved)
289 }; 318 };
290 319
291 return x; 320 return x;
@@ -617,7 +646,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)
617 return 1; 646 return 1;
618 647
619 start = page_address(page); 648 start = page_address(page);
620 length = (PAGE_SIZE << compound_order(page)); 649 length = (PAGE_SIZE << compound_order(page)) - s->reserved;
621 end = start + length; 650 end = start + length;
622 remainder = length % s->size; 651 remainder = length % s->size;
623 if (!remainder) 652 if (!remainder)
@@ -698,7 +727,7 @@ static int check_slab(struct kmem_cache *s, struct page *page)
698 return 0; 727 return 0;
699 } 728 }
700 729
701 maxobj = (PAGE_SIZE << compound_order(page)) / s->size; 730 maxobj = order_objects(compound_order(page), s->size, s->reserved);
702 if (page->objects > maxobj) { 731 if (page->objects > maxobj) {
703 slab_err(s, page, "objects %u > max %u", 732 slab_err(s, page, "objects %u > max %u",
704 s->name, page->objects, maxobj); 733 s->name, page->objects, maxobj);
@@ -748,7 +777,7 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
748 nr++; 777 nr++;
749 } 778 }
750 779
751 max_objects = (PAGE_SIZE << compound_order(page)) / s->size; 780 max_objects = order_objects(compound_order(page), s->size, s->reserved);
752 if (max_objects > MAX_OBJS_PER_PAGE) 781 if (max_objects > MAX_OBJS_PER_PAGE)
753 max_objects = MAX_OBJS_PER_PAGE; 782 max_objects = MAX_OBJS_PER_PAGE;
754 783
@@ -800,21 +829,31 @@ static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
800static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object) 829static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object)
801{ 830{
802 flags &= gfp_allowed_mask; 831 flags &= gfp_allowed_mask;
803 kmemcheck_slab_alloc(s, flags, object, s->objsize); 832 kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
804 kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, flags); 833 kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, flags);
805} 834}
806 835
807static inline void slab_free_hook(struct kmem_cache *s, void *x) 836static inline void slab_free_hook(struct kmem_cache *s, void *x)
808{ 837{
809 kmemleak_free_recursive(x, s->flags); 838 kmemleak_free_recursive(x, s->flags);
810}
811 839
812static inline void slab_free_hook_irq(struct kmem_cache *s, void *object) 840 /*
813{ 841 * Trouble is that we may no longer disable interupts in the fast path
814 kmemcheck_slab_free(s, object, s->objsize); 842 * So in order to make the debug calls that expect irqs to be
815 debug_check_no_locks_freed(object, s->objsize); 843 * disabled we need to disable interrupts temporarily.
816 if (!(s->flags & SLAB_DEBUG_OBJECTS)) 844 */
817 debug_check_no_obj_freed(object, s->objsize); 845#if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP)
846 {
847 unsigned long flags;
848
849 local_irq_save(flags);
850 kmemcheck_slab_free(s, x, s->objsize);
851 debug_check_no_locks_freed(x, s->objsize);
852 if (!(s->flags & SLAB_DEBUG_OBJECTS))
853 debug_check_no_obj_freed(x, s->objsize);
854 local_irq_restore(flags);
855 }
856#endif
818} 857}
819 858
820/* 859/*
@@ -1101,9 +1140,6 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
1101 1140
1102static inline void slab_free_hook(struct kmem_cache *s, void *x) {} 1141static inline void slab_free_hook(struct kmem_cache *s, void *x) {}
1103 1142
1104static inline void slab_free_hook_irq(struct kmem_cache *s,
1105 void *object) {}
1106
1107#endif /* CONFIG_SLUB_DEBUG */ 1143#endif /* CONFIG_SLUB_DEBUG */
1108 1144
1109/* 1145/*
@@ -1249,21 +1285,38 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
1249 __free_pages(page, order); 1285 __free_pages(page, order);
1250} 1286}
1251 1287
1288#define need_reserve_slab_rcu \
1289 (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head))
1290
1252static void rcu_free_slab(struct rcu_head *h) 1291static void rcu_free_slab(struct rcu_head *h)
1253{ 1292{
1254 struct page *page; 1293 struct page *page;
1255 1294
1256 page = container_of((struct list_head *)h, struct page, lru); 1295 if (need_reserve_slab_rcu)
1296 page = virt_to_head_page(h);
1297 else
1298 page = container_of((struct list_head *)h, struct page, lru);
1299
1257 __free_slab(page->slab, page); 1300 __free_slab(page->slab, page);
1258} 1301}
1259 1302
1260static void free_slab(struct kmem_cache *s, struct page *page) 1303static void free_slab(struct kmem_cache *s, struct page *page)
1261{ 1304{
1262 if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) { 1305 if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) {
1263 /* 1306 struct rcu_head *head;
1264 * RCU free overloads the RCU head over the LRU 1307
1265 */ 1308 if (need_reserve_slab_rcu) {
1266 struct rcu_head *head = (void *)&page->lru; 1309 int order = compound_order(page);
1310 int offset = (PAGE_SIZE << order) - s->reserved;
1311
1312 VM_BUG_ON(s->reserved != sizeof(*head));
1313 head = page_address(page) + offset;
1314 } else {
1315 /*
1316 * RCU free overloads the RCU head over the LRU
1317 */
1318 head = (void *)&page->lru;
1319 }
1267 1320
1268 call_rcu(head, rcu_free_slab); 1321 call_rcu(head, rcu_free_slab);
1269 } else 1322 } else
@@ -1487,6 +1540,77 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
1487 } 1540 }
1488} 1541}
1489 1542
1543#ifdef CONFIG_CMPXCHG_LOCAL
1544#ifdef CONFIG_PREEMPT
1545/*
1546 * Calculate the next globally unique transaction for disambiguiation
1547 * during cmpxchg. The transactions start with the cpu number and are then
1548 * incremented by CONFIG_NR_CPUS.
1549 */
1550#define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS)
1551#else
1552/*
1553 * No preemption supported therefore also no need to check for
1554 * different cpus.
1555 */
1556#define TID_STEP 1
1557#endif
1558
1559static inline unsigned long next_tid(unsigned long tid)
1560{
1561 return tid + TID_STEP;
1562}
1563
1564static inline unsigned int tid_to_cpu(unsigned long tid)
1565{
1566 return tid % TID_STEP;
1567}
1568
1569static inline unsigned long tid_to_event(unsigned long tid)
1570{
1571 return tid / TID_STEP;
1572}
1573
1574static inline unsigned int init_tid(int cpu)
1575{
1576 return cpu;
1577}
1578
1579static inline void note_cmpxchg_failure(const char *n,
1580 const struct kmem_cache *s, unsigned long tid)
1581{
1582#ifdef SLUB_DEBUG_CMPXCHG
1583 unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
1584
1585 printk(KERN_INFO "%s %s: cmpxchg redo ", n, s->name);
1586
1587#ifdef CONFIG_PREEMPT
1588 if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
1589 printk("due to cpu change %d -> %d\n",
1590 tid_to_cpu(tid), tid_to_cpu(actual_tid));
1591 else
1592#endif
1593 if (tid_to_event(tid) != tid_to_event(actual_tid))
1594 printk("due to cpu running other code. Event %ld->%ld\n",
1595 tid_to_event(tid), tid_to_event(actual_tid));
1596 else
1597 printk("for unknown reason: actual=%lx was=%lx target=%lx\n",
1598 actual_tid, tid, next_tid(tid));
1599#endif
1600}
1601
1602#endif
1603
1604void init_kmem_cache_cpus(struct kmem_cache *s)
1605{
1606#if defined(CONFIG_CMPXCHG_LOCAL) && defined(CONFIG_PREEMPT)
1607 int cpu;
1608
1609 for_each_possible_cpu(cpu)
1610 per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
1611#endif
1612
1613}
1490/* 1614/*
1491 * Remove the cpu slab 1615 * Remove the cpu slab
1492 */ 1616 */
@@ -1518,6 +1642,9 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1518 page->inuse--; 1642 page->inuse--;
1519 } 1643 }
1520 c->page = NULL; 1644 c->page = NULL;
1645#ifdef CONFIG_CMPXCHG_LOCAL
1646 c->tid = next_tid(c->tid);
1647#endif
1521 unfreeze_slab(s, page, tail); 1648 unfreeze_slab(s, page, tail);
1522} 1649}
1523 1650
@@ -1652,6 +1779,19 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
1652{ 1779{
1653 void **object; 1780 void **object;
1654 struct page *new; 1781 struct page *new;
1782#ifdef CONFIG_CMPXCHG_LOCAL
1783 unsigned long flags;
1784
1785 local_irq_save(flags);
1786#ifdef CONFIG_PREEMPT
1787 /*
1788 * We may have been preempted and rescheduled on a different
1789 * cpu before disabling interrupts. Need to reload cpu area
1790 * pointer.
1791 */
1792 c = this_cpu_ptr(s->cpu_slab);
1793#endif
1794#endif
1655 1795
1656 /* We handle __GFP_ZERO in the caller */ 1796 /* We handle __GFP_ZERO in the caller */
1657 gfpflags &= ~__GFP_ZERO; 1797 gfpflags &= ~__GFP_ZERO;
@@ -1678,6 +1818,10 @@ load_freelist:
1678 c->node = page_to_nid(c->page); 1818 c->node = page_to_nid(c->page);
1679unlock_out: 1819unlock_out:
1680 slab_unlock(c->page); 1820 slab_unlock(c->page);
1821#ifdef CONFIG_CMPXCHG_LOCAL
1822 c->tid = next_tid(c->tid);
1823 local_irq_restore(flags);
1824#endif
1681 stat(s, ALLOC_SLOWPATH); 1825 stat(s, ALLOC_SLOWPATH);
1682 return object; 1826 return object;
1683 1827
@@ -1739,23 +1883,76 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
1739{ 1883{
1740 void **object; 1884 void **object;
1741 struct kmem_cache_cpu *c; 1885 struct kmem_cache_cpu *c;
1886#ifdef CONFIG_CMPXCHG_LOCAL
1887 unsigned long tid;
1888#else
1742 unsigned long flags; 1889 unsigned long flags;
1890#endif
1743 1891
1744 if (slab_pre_alloc_hook(s, gfpflags)) 1892 if (slab_pre_alloc_hook(s, gfpflags))
1745 return NULL; 1893 return NULL;
1746 1894
1895#ifndef CONFIG_CMPXCHG_LOCAL
1747 local_irq_save(flags); 1896 local_irq_save(flags);
1897#else
1898redo:
1899#endif
1900
1901 /*
1902 * Must read kmem_cache cpu data via this cpu ptr. Preemption is
1903 * enabled. We may switch back and forth between cpus while
1904 * reading from one cpu area. That does not matter as long
1905 * as we end up on the original cpu again when doing the cmpxchg.
1906 */
1748 c = __this_cpu_ptr(s->cpu_slab); 1907 c = __this_cpu_ptr(s->cpu_slab);
1908
1909#ifdef CONFIG_CMPXCHG_LOCAL
1910 /*
1911 * The transaction ids are globally unique per cpu and per operation on
1912 * a per cpu queue. Thus they can be guarantee that the cmpxchg_double
1913 * occurs on the right processor and that there was no operation on the
1914 * linked list in between.
1915 */
1916 tid = c->tid;
1917 barrier();
1918#endif
1919
1749 object = c->freelist; 1920 object = c->freelist;
1750 if (unlikely(!object || !node_match(c, node))) 1921 if (unlikely(!object || !node_match(c, node)))
1751 1922
1752 object = __slab_alloc(s, gfpflags, node, addr, c); 1923 object = __slab_alloc(s, gfpflags, node, addr, c);
1753 1924
1754 else { 1925 else {
1926#ifdef CONFIG_CMPXCHG_LOCAL
1927 /*
1928 * The cmpxchg will only match if there was no additonal
1929 * operation and if we are on the right processor.
1930 *
1931 * The cmpxchg does the following atomically (without lock semantics!)
1932 * 1. Relocate first pointer to the current per cpu area.
1933 * 2. Verify that tid and freelist have not been changed
1934 * 3. If they were not changed replace tid and freelist
1935 *
1936 * Since this is without lock semantics the protection is only against
1937 * code executing on this cpu *not* from access by other cpus.
1938 */
1939 if (unlikely(!this_cpu_cmpxchg_double(
1940 s->cpu_slab->freelist, s->cpu_slab->tid,
1941 object, tid,
1942 get_freepointer(s, object), next_tid(tid)))) {
1943
1944 note_cmpxchg_failure("slab_alloc", s, tid);
1945 goto redo;
1946 }
1947#else
1755 c->freelist = get_freepointer(s, object); 1948 c->freelist = get_freepointer(s, object);
1949#endif
1756 stat(s, ALLOC_FASTPATH); 1950 stat(s, ALLOC_FASTPATH);
1757 } 1951 }
1952
1953#ifndef CONFIG_CMPXCHG_LOCAL
1758 local_irq_restore(flags); 1954 local_irq_restore(flags);
1955#endif
1759 1956
1760 if (unlikely(gfpflags & __GFP_ZERO) && object) 1957 if (unlikely(gfpflags & __GFP_ZERO) && object)
1761 memset(object, 0, s->objsize); 1958 memset(object, 0, s->objsize);
@@ -1833,9 +2030,13 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
1833{ 2030{
1834 void *prior; 2031 void *prior;
1835 void **object = (void *)x; 2032 void **object = (void *)x;
2033#ifdef CONFIG_CMPXCHG_LOCAL
2034 unsigned long flags;
1836 2035
1837 stat(s, FREE_SLOWPATH); 2036 local_irq_save(flags);
2037#endif
1838 slab_lock(page); 2038 slab_lock(page);
2039 stat(s, FREE_SLOWPATH);
1839 2040
1840 if (kmem_cache_debug(s)) 2041 if (kmem_cache_debug(s))
1841 goto debug; 2042 goto debug;
@@ -1865,6 +2066,9 @@ checks_ok:
1865 2066
1866out_unlock: 2067out_unlock:
1867 slab_unlock(page); 2068 slab_unlock(page);
2069#ifdef CONFIG_CMPXCHG_LOCAL
2070 local_irq_restore(flags);
2071#endif
1868 return; 2072 return;
1869 2073
1870slab_empty: 2074slab_empty:
@@ -1876,6 +2080,9 @@ slab_empty:
1876 stat(s, FREE_REMOVE_PARTIAL); 2080 stat(s, FREE_REMOVE_PARTIAL);
1877 } 2081 }
1878 slab_unlock(page); 2082 slab_unlock(page);
2083#ifdef CONFIG_CMPXCHG_LOCAL
2084 local_irq_restore(flags);
2085#endif
1879 stat(s, FREE_SLAB); 2086 stat(s, FREE_SLAB);
1880 discard_slab(s, page); 2087 discard_slab(s, page);
1881 return; 2088 return;
@@ -1902,23 +2109,56 @@ static __always_inline void slab_free(struct kmem_cache *s,
1902{ 2109{
1903 void **object = (void *)x; 2110 void **object = (void *)x;
1904 struct kmem_cache_cpu *c; 2111 struct kmem_cache_cpu *c;
2112#ifdef CONFIG_CMPXCHG_LOCAL
2113 unsigned long tid;
2114#else
1905 unsigned long flags; 2115 unsigned long flags;
2116#endif
1906 2117
1907 slab_free_hook(s, x); 2118 slab_free_hook(s, x);
1908 2119
2120#ifndef CONFIG_CMPXCHG_LOCAL
1909 local_irq_save(flags); 2121 local_irq_save(flags);
2122
2123#else
2124redo:
2125#endif
2126
2127 /*
2128 * Determine the currently cpus per cpu slab.
2129 * The cpu may change afterward. However that does not matter since
2130 * data is retrieved via this pointer. If we are on the same cpu
2131 * during the cmpxchg then the free will succedd.
2132 */
1910 c = __this_cpu_ptr(s->cpu_slab); 2133 c = __this_cpu_ptr(s->cpu_slab);
1911 2134
1912 slab_free_hook_irq(s, x); 2135#ifdef CONFIG_CMPXCHG_LOCAL
2136 tid = c->tid;
2137 barrier();
2138#endif
1913 2139
1914 if (likely(page == c->page && c->node != NUMA_NO_NODE)) { 2140 if (likely(page == c->page && c->node != NUMA_NO_NODE)) {
1915 set_freepointer(s, object, c->freelist); 2141 set_freepointer(s, object, c->freelist);
2142
2143#ifdef CONFIG_CMPXCHG_LOCAL
2144 if (unlikely(!this_cpu_cmpxchg_double(
2145 s->cpu_slab->freelist, s->cpu_slab->tid,
2146 c->freelist, tid,
2147 object, next_tid(tid)))) {
2148
2149 note_cmpxchg_failure("slab_free", s, tid);
2150 goto redo;
2151 }
2152#else
1916 c->freelist = object; 2153 c->freelist = object;
2154#endif
1917 stat(s, FREE_FASTPATH); 2155 stat(s, FREE_FASTPATH);
1918 } else 2156 } else
1919 __slab_free(s, page, x, addr); 2157 __slab_free(s, page, x, addr);
1920 2158
2159#ifndef CONFIG_CMPXCHG_LOCAL
1921 local_irq_restore(flags); 2160 local_irq_restore(flags);
2161#endif
1922} 2162}
1923 2163
1924void kmem_cache_free(struct kmem_cache *s, void *x) 2164void kmem_cache_free(struct kmem_cache *s, void *x)
@@ -1988,13 +2228,13 @@ static int slub_nomerge;
1988 * the smallest order which will fit the object. 2228 * the smallest order which will fit the object.
1989 */ 2229 */
1990static inline int slab_order(int size, int min_objects, 2230static inline int slab_order(int size, int min_objects,
1991 int max_order, int fract_leftover) 2231 int max_order, int fract_leftover, int reserved)
1992{ 2232{
1993 int order; 2233 int order;
1994 int rem; 2234 int rem;
1995 int min_order = slub_min_order; 2235 int min_order = slub_min_order;
1996 2236
1997 if ((PAGE_SIZE << min_order) / size > MAX_OBJS_PER_PAGE) 2237 if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE)
1998 return get_order(size * MAX_OBJS_PER_PAGE) - 1; 2238 return get_order(size * MAX_OBJS_PER_PAGE) - 1;
1999 2239
2000 for (order = max(min_order, 2240 for (order = max(min_order,
@@ -2003,10 +2243,10 @@ static inline int slab_order(int size, int min_objects,
2003 2243
2004 unsigned long slab_size = PAGE_SIZE << order; 2244 unsigned long slab_size = PAGE_SIZE << order;
2005 2245
2006 if (slab_size < min_objects * size) 2246 if (slab_size < min_objects * size + reserved)
2007 continue; 2247 continue;
2008 2248
2009 rem = slab_size % size; 2249 rem = (slab_size - reserved) % size;
2010 2250
2011 if (rem <= slab_size / fract_leftover) 2251 if (rem <= slab_size / fract_leftover)
2012 break; 2252 break;
@@ -2016,7 +2256,7 @@ static inline int slab_order(int size, int min_objects,
2016 return order; 2256 return order;
2017} 2257}
2018 2258
2019static inline int calculate_order(int size) 2259static inline int calculate_order(int size, int reserved)
2020{ 2260{
2021 int order; 2261 int order;
2022 int min_objects; 2262 int min_objects;
@@ -2034,14 +2274,14 @@ static inline int calculate_order(int size)
2034 min_objects = slub_min_objects; 2274 min_objects = slub_min_objects;
2035 if (!min_objects) 2275 if (!min_objects)
2036 min_objects = 4 * (fls(nr_cpu_ids) + 1); 2276 min_objects = 4 * (fls(nr_cpu_ids) + 1);
2037 max_objects = (PAGE_SIZE << slub_max_order)/size; 2277 max_objects = order_objects(slub_max_order, size, reserved);
2038 min_objects = min(min_objects, max_objects); 2278 min_objects = min(min_objects, max_objects);
2039 2279
2040 while (min_objects > 1) { 2280 while (min_objects > 1) {
2041 fraction = 16; 2281 fraction = 16;
2042 while (fraction >= 4) { 2282 while (fraction >= 4) {
2043 order = slab_order(size, min_objects, 2283 order = slab_order(size, min_objects,
2044 slub_max_order, fraction); 2284 slub_max_order, fraction, reserved);
2045 if (order <= slub_max_order) 2285 if (order <= slub_max_order)
2046 return order; 2286 return order;
2047 fraction /= 2; 2287 fraction /= 2;
@@ -2053,14 +2293,14 @@ static inline int calculate_order(int size)
2053 * We were unable to place multiple objects in a slab. Now 2293 * We were unable to place multiple objects in a slab. Now
2054 * lets see if we can place a single object there. 2294 * lets see if we can place a single object there.
2055 */ 2295 */
2056 order = slab_order(size, 1, slub_max_order, 1); 2296 order = slab_order(size, 1, slub_max_order, 1, reserved);
2057 if (order <= slub_max_order) 2297 if (order <= slub_max_order)
2058 return order; 2298 return order;
2059 2299
2060 /* 2300 /*
2061 * Doh this slab cannot be placed using slub_max_order. 2301 * Doh this slab cannot be placed using slub_max_order.
2062 */ 2302 */
2063 order = slab_order(size, 1, MAX_ORDER, 1); 2303 order = slab_order(size, 1, MAX_ORDER, 1, reserved);
2064 if (order < MAX_ORDER) 2304 if (order < MAX_ORDER)
2065 return order; 2305 return order;
2066 return -ENOSYS; 2306 return -ENOSYS;
@@ -2110,9 +2350,23 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
2110 BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE < 2350 BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
2111 SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu)); 2351 SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu));
2112 2352
2353#ifdef CONFIG_CMPXCHG_LOCAL
2354 /*
2355 * Must align to double word boundary for the double cmpxchg instructions
2356 * to work.
2357 */
2358 s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu), 2 * sizeof(void *));
2359#else
2360 /* Regular alignment is sufficient */
2113 s->cpu_slab = alloc_percpu(struct kmem_cache_cpu); 2361 s->cpu_slab = alloc_percpu(struct kmem_cache_cpu);
2362#endif
2114 2363
2115 return s->cpu_slab != NULL; 2364 if (!s->cpu_slab)
2365 return 0;
2366
2367 init_kmem_cache_cpus(s);
2368
2369 return 1;
2116} 2370}
2117 2371
2118static struct kmem_cache *kmem_cache_node; 2372static struct kmem_cache *kmem_cache_node;
@@ -2311,7 +2565,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
2311 if (forced_order >= 0) 2565 if (forced_order >= 0)
2312 order = forced_order; 2566 order = forced_order;
2313 else 2567 else
2314 order = calculate_order(size); 2568 order = calculate_order(size, s->reserved);
2315 2569
2316 if (order < 0) 2570 if (order < 0)
2317 return 0; 2571 return 0;
@@ -2329,8 +2583,8 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
2329 /* 2583 /*
2330 * Determine the number of objects per slab 2584 * Determine the number of objects per slab
2331 */ 2585 */
2332 s->oo = oo_make(order, size); 2586 s->oo = oo_make(order, size, s->reserved);
2333 s->min = oo_make(get_order(size), size); 2587 s->min = oo_make(get_order(size), size, s->reserved);
2334 if (oo_objects(s->oo) > oo_objects(s->max)) 2588 if (oo_objects(s->oo) > oo_objects(s->max))
2335 s->max = s->oo; 2589 s->max = s->oo;
2336 2590
@@ -2349,6 +2603,10 @@ static int kmem_cache_open(struct kmem_cache *s,
2349 s->objsize = size; 2603 s->objsize = size;
2350 s->align = align; 2604 s->align = align;
2351 s->flags = kmem_cache_flags(size, flags, name, ctor); 2605 s->flags = kmem_cache_flags(size, flags, name, ctor);
2606 s->reserved = 0;
2607
2608 if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU))
2609 s->reserved = sizeof(struct rcu_head);
2352 2610
2353 if (!calculate_sizes(s, -1)) 2611 if (!calculate_sizes(s, -1))
2354 goto error; 2612 goto error;
@@ -2399,12 +2657,6 @@ unsigned int kmem_cache_size(struct kmem_cache *s)
2399} 2657}
2400EXPORT_SYMBOL(kmem_cache_size); 2658EXPORT_SYMBOL(kmem_cache_size);
2401 2659
2402const char *kmem_cache_name(struct kmem_cache *s)
2403{
2404 return s->name;
2405}
2406EXPORT_SYMBOL(kmem_cache_name);
2407
2408static void list_slab_objects(struct kmem_cache *s, struct page *page, 2660static void list_slab_objects(struct kmem_cache *s, struct page *page,
2409 const char *text) 2661 const char *text)
2410{ 2662{
@@ -2696,7 +2948,6 @@ EXPORT_SYMBOL(__kmalloc_node);
2696size_t ksize(const void *object) 2948size_t ksize(const void *object)
2697{ 2949{
2698 struct page *page; 2950 struct page *page;
2699 struct kmem_cache *s;
2700 2951
2701 if (unlikely(object == ZERO_SIZE_PTR)) 2952 if (unlikely(object == ZERO_SIZE_PTR))
2702 return 0; 2953 return 0;
@@ -2707,28 +2958,8 @@ size_t ksize(const void *object)
2707 WARN_ON(!PageCompound(page)); 2958 WARN_ON(!PageCompound(page));
2708 return PAGE_SIZE << compound_order(page); 2959 return PAGE_SIZE << compound_order(page);
2709 } 2960 }
2710 s = page->slab;
2711 2961
2712#ifdef CONFIG_SLUB_DEBUG 2962 return slab_ksize(page->slab);
2713 /*
2714 * Debugging requires use of the padding between object
2715 * and whatever may come after it.
2716 */
2717 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
2718 return s->objsize;
2719
2720#endif
2721 /*
2722 * If we have the need to store the freelist pointer
2723 * back there or track user information then we can
2724 * only use the space before that information.
2725 */
2726 if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
2727 return s->inuse;
2728 /*
2729 * Else we can use all the padding etc for the allocation
2730 */
2731 return s->size;
2732} 2963}
2733EXPORT_SYMBOL(ksize); 2964EXPORT_SYMBOL(ksize);
2734 2965
@@ -4017,6 +4248,12 @@ static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
4017} 4248}
4018SLAB_ATTR_RO(destroy_by_rcu); 4249SLAB_ATTR_RO(destroy_by_rcu);
4019 4250
4251static ssize_t reserved_show(struct kmem_cache *s, char *buf)
4252{
4253 return sprintf(buf, "%d\n", s->reserved);
4254}
4255SLAB_ATTR_RO(reserved);
4256
4020#ifdef CONFIG_SLUB_DEBUG 4257#ifdef CONFIG_SLUB_DEBUG
4021static ssize_t slabs_show(struct kmem_cache *s, char *buf) 4258static ssize_t slabs_show(struct kmem_cache *s, char *buf)
4022{ 4259{
@@ -4303,6 +4540,7 @@ static struct attribute *slab_attrs[] = {
4303 &reclaim_account_attr.attr, 4540 &reclaim_account_attr.attr,
4304 &destroy_by_rcu_attr.attr, 4541 &destroy_by_rcu_attr.attr,
4305 &shrink_attr.attr, 4542 &shrink_attr.attr,
4543 &reserved_attr.attr,
4306#ifdef CONFIG_SLUB_DEBUG 4544#ifdef CONFIG_SLUB_DEBUG
4307 &total_objects_attr.attr, 4545 &total_objects_attr.attr,
4308 &slabs_attr.attr, 4546 &slabs_attr.attr,