aboutsummaryrefslogtreecommitdiffstats
path: root/mm/slub.c
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
commitc71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch)
treeecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /mm/slub.c
parentea53c912f8a86a8567697115b6a0d8152beee5c8 (diff)
parent6a00f206debf8a5c8899055726ad127dbeeed098 (diff)
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts: litmus/sched_cedf.c
Diffstat (limited to 'mm/slub.c')
-rw-r--r--mm/slub.c1283
1 files changed, 752 insertions, 531 deletions
diff --git a/mm/slub.c b/mm/slub.c
index 13fffe1f0f3d..35f351f26193 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -28,6 +28,8 @@
28#include <linux/math64.h> 28#include <linux/math64.h>
29#include <linux/fault-inject.h> 29#include <linux/fault-inject.h>
30 30
31#include <trace/events/kmem.h>
32
31/* 33/*
32 * Lock order: 34 * Lock order:
33 * 1. slab_lock(page) 35 * 1. slab_lock(page)
@@ -62,7 +64,7 @@
62 * we must stay away from it for a while since we may cause a bouncing 64 * we must stay away from it for a while since we may cause a bouncing
63 * cacheline if we try to acquire the lock. So go onto the next slab. 65 * cacheline if we try to acquire the lock. So go onto the next slab.
64 * If all pages are busy then we may allocate a new slab instead of reusing 66 * If all pages are busy then we may allocate a new slab instead of reusing
65 * a partial slab. A new slab has noone operating on it and thus there is 67 * a partial slab. A new slab has no one operating on it and thus there is
66 * no danger of cacheline contention. 68 * no danger of cacheline contention.
67 * 69 *
68 * Interrupts are disabled during allocation and deallocation in order to 70 * Interrupts are disabled during allocation and deallocation in order to
@@ -168,7 +170,6 @@ static inline int kmem_cache_debug(struct kmem_cache *s)
168 170
169/* Internal SLUB flags */ 171/* Internal SLUB flags */
170#define __OBJECT_POISON 0x80000000UL /* Poison object */ 172#define __OBJECT_POISON 0x80000000UL /* Poison object */
171#define __SYSFS_ADD_DEFERRED 0x40000000UL /* Not yet visible via sysfs */
172 173
173static int kmem_size = sizeof(struct kmem_cache); 174static int kmem_size = sizeof(struct kmem_cache);
174 175
@@ -178,7 +179,7 @@ static struct notifier_block slab_notifier;
178 179
179static enum { 180static enum {
180 DOWN, /* No slab functionality available */ 181 DOWN, /* No slab functionality available */
181 PARTIAL, /* kmem_cache_open() works but kmalloc does not */ 182 PARTIAL, /* Kmem_cache_node works */
182 UP, /* Everything works but does not show up in sysfs */ 183 UP, /* Everything works but does not show up in sysfs */
183 SYSFS /* Sysfs up */ 184 SYSFS /* Sysfs up */
184} slab_state = DOWN; 185} slab_state = DOWN;
@@ -199,7 +200,7 @@ struct track {
199 200
200enum track_item { TRACK_ALLOC, TRACK_FREE }; 201enum track_item { TRACK_ALLOC, TRACK_FREE };
201 202
202#ifdef CONFIG_SLUB_DEBUG 203#ifdef CONFIG_SYSFS
203static int sysfs_slab_add(struct kmem_cache *); 204static int sysfs_slab_add(struct kmem_cache *);
204static int sysfs_slab_alias(struct kmem_cache *, const char *); 205static int sysfs_slab_alias(struct kmem_cache *, const char *);
205static void sysfs_slab_remove(struct kmem_cache *); 206static void sysfs_slab_remove(struct kmem_cache *);
@@ -210,12 +211,13 @@ static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
210 { return 0; } 211 { return 0; }
211static inline void sysfs_slab_remove(struct kmem_cache *s) 212static inline void sysfs_slab_remove(struct kmem_cache *s)
212{ 213{
214 kfree(s->name);
213 kfree(s); 215 kfree(s);
214} 216}
215 217
216#endif 218#endif
217 219
218static inline void stat(struct kmem_cache *s, enum stat_item si) 220static inline void stat(const struct kmem_cache *s, enum stat_item si)
219{ 221{
220#ifdef CONFIG_SLUB_STATS 222#ifdef CONFIG_SLUB_STATS
221 __this_cpu_inc(s->cpu_slab->stat[si]); 223 __this_cpu_inc(s->cpu_slab->stat[si]);
@@ -233,11 +235,7 @@ int slab_is_available(void)
233 235
234static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) 236static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
235{ 237{
236#ifdef CONFIG_NUMA
237 return s->node[node]; 238 return s->node[node];
238#else
239 return &s->local_node;
240#endif
241} 239}
242 240
243/* Verify that a pointer has an address that is valid within a slab page */ 241/* Verify that a pointer has an address that is valid within a slab page */
@@ -263,6 +261,18 @@ static inline void *get_freepointer(struct kmem_cache *s, void *object)
263 return *(void **)(object + s->offset); 261 return *(void **)(object + s->offset);
264} 262}
265 263
264static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
265{
266 void *p;
267
268#ifdef CONFIG_DEBUG_PAGEALLOC
269 probe_kernel_read(&p, (void **)(object + s->offset), sizeof(p));
270#else
271 p = get_freepointer(s, object);
272#endif
273 return p;
274}
275
266static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) 276static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
267{ 277{
268 *(void **)(object + s->offset) = fp; 278 *(void **)(object + s->offset) = fp;
@@ -273,21 +283,46 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
273 for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\ 283 for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\
274 __p += (__s)->size) 284 __p += (__s)->size)
275 285
276/* Scan freelist */
277#define for_each_free_object(__p, __s, __free) \
278 for (__p = (__free); __p; __p = get_freepointer((__s), __p))
279
280/* Determine object index from a given position */ 286/* Determine object index from a given position */
281static inline int slab_index(void *p, struct kmem_cache *s, void *addr) 287static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
282{ 288{
283 return (p - addr) / s->size; 289 return (p - addr) / s->size;
284} 290}
285 291
292static inline size_t slab_ksize(const struct kmem_cache *s)
293{
294#ifdef CONFIG_SLUB_DEBUG
295 /*
296 * Debugging requires use of the padding between object
297 * and whatever may come after it.
298 */
299 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
300 return s->objsize;
301
302#endif
303 /*
304 * If we have the need to store the freelist pointer
305 * back there or track user information then we can
306 * only use the space before that information.
307 */
308 if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
309 return s->inuse;
310 /*
311 * Else we can use all the padding etc for the allocation
312 */
313 return s->size;
314}
315
316static inline int order_objects(int order, unsigned long size, int reserved)
317{
318 return ((PAGE_SIZE << order) - reserved) / size;
319}
320
286static inline struct kmem_cache_order_objects oo_make(int order, 321static inline struct kmem_cache_order_objects oo_make(int order,
287 unsigned long size) 322 unsigned long size, int reserved)
288{ 323{
289 struct kmem_cache_order_objects x = { 324 struct kmem_cache_order_objects x = {
290 (order << OO_SHIFT) + (PAGE_SIZE << order) / size 325 (order << OO_SHIFT) + order_objects(order, size, reserved)
291 }; 326 };
292 327
293 return x; 328 return x;
@@ -305,6 +340,21 @@ static inline int oo_objects(struct kmem_cache_order_objects x)
305 340
306#ifdef CONFIG_SLUB_DEBUG 341#ifdef CONFIG_SLUB_DEBUG
307/* 342/*
343 * Determine a map of object in use on a page.
344 *
345 * Slab lock or node listlock must be held to guarantee that the page does
346 * not vanish from under us.
347 */
348static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map)
349{
350 void *p;
351 void *addr = page_address(page);
352
353 for (p = page->freelist; p; p = get_freepointer(s, p))
354 set_bit(slab_index(p, s, addr), map);
355}
356
357/*
308 * Debug settings: 358 * Debug settings:
309 */ 359 */
310#ifdef CONFIG_SLUB_DEBUG_ON 360#ifdef CONFIG_SLUB_DEBUG_ON
@@ -494,7 +544,7 @@ static void slab_err(struct kmem_cache *s, struct page *page, char *fmt, ...)
494 dump_stack(); 544 dump_stack();
495} 545}
496 546
497static void init_object(struct kmem_cache *s, void *object, int active) 547static void init_object(struct kmem_cache *s, void *object, u8 val)
498{ 548{
499 u8 *p = object; 549 u8 *p = object;
500 550
@@ -504,9 +554,7 @@ static void init_object(struct kmem_cache *s, void *object, int active)
504 } 554 }
505 555
506 if (s->flags & SLAB_RED_ZONE) 556 if (s->flags & SLAB_RED_ZONE)
507 memset(p + s->objsize, 557 memset(p + s->objsize, val, s->inuse - s->objsize);
508 active ? SLUB_RED_ACTIVE : SLUB_RED_INACTIVE,
509 s->inuse - s->objsize);
510} 558}
511 559
512static u8 *check_bytes(u8 *start, unsigned int value, unsigned int bytes) 560static u8 *check_bytes(u8 *start, unsigned int value, unsigned int bytes)
@@ -621,7 +669,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)
621 return 1; 669 return 1;
622 670
623 start = page_address(page); 671 start = page_address(page);
624 length = (PAGE_SIZE << compound_order(page)); 672 length = (PAGE_SIZE << compound_order(page)) - s->reserved;
625 end = start + length; 673 end = start + length;
626 remainder = length % s->size; 674 remainder = length % s->size;
627 if (!remainder) 675 if (!remainder)
@@ -641,17 +689,14 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)
641} 689}
642 690
643static int check_object(struct kmem_cache *s, struct page *page, 691static int check_object(struct kmem_cache *s, struct page *page,
644 void *object, int active) 692 void *object, u8 val)
645{ 693{
646 u8 *p = object; 694 u8 *p = object;
647 u8 *endobject = object + s->objsize; 695 u8 *endobject = object + s->objsize;
648 696
649 if (s->flags & SLAB_RED_ZONE) { 697 if (s->flags & SLAB_RED_ZONE) {
650 unsigned int red =
651 active ? SLUB_RED_ACTIVE : SLUB_RED_INACTIVE;
652
653 if (!check_bytes_and_report(s, page, object, "Redzone", 698 if (!check_bytes_and_report(s, page, object, "Redzone",
654 endobject, red, s->inuse - s->objsize)) 699 endobject, val, s->inuse - s->objsize))
655 return 0; 700 return 0;
656 } else { 701 } else {
657 if ((s->flags & SLAB_POISON) && s->objsize < s->inuse) { 702 if ((s->flags & SLAB_POISON) && s->objsize < s->inuse) {
@@ -661,7 +706,7 @@ static int check_object(struct kmem_cache *s, struct page *page,
661 } 706 }
662 707
663 if (s->flags & SLAB_POISON) { 708 if (s->flags & SLAB_POISON) {
664 if (!active && (s->flags & __OBJECT_POISON) && 709 if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
665 (!check_bytes_and_report(s, page, p, "Poison", p, 710 (!check_bytes_and_report(s, page, p, "Poison", p,
666 POISON_FREE, s->objsize - 1) || 711 POISON_FREE, s->objsize - 1) ||
667 !check_bytes_and_report(s, page, p, "Poison", 712 !check_bytes_and_report(s, page, p, "Poison",
@@ -673,7 +718,7 @@ static int check_object(struct kmem_cache *s, struct page *page,
673 check_pad_bytes(s, page, p); 718 check_pad_bytes(s, page, p);
674 } 719 }
675 720
676 if (!s->offset && active) 721 if (!s->offset && val == SLUB_RED_ACTIVE)
677 /* 722 /*
678 * Object and freepointer overlap. Cannot check 723 * Object and freepointer overlap. Cannot check
679 * freepointer while object is allocated. 724 * freepointer while object is allocated.
@@ -705,7 +750,7 @@ static int check_slab(struct kmem_cache *s, struct page *page)
705 return 0; 750 return 0;
706 } 751 }
707 752
708 maxobj = (PAGE_SIZE << compound_order(page)) / s->size; 753 maxobj = order_objects(compound_order(page), s->size, s->reserved);
709 if (page->objects > maxobj) { 754 if (page->objects > maxobj) {
710 slab_err(s, page, "objects %u > max %u", 755 slab_err(s, page, "objects %u > max %u",
711 s->name, page->objects, maxobj); 756 s->name, page->objects, maxobj);
@@ -755,7 +800,7 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
755 nr++; 800 nr++;
756 } 801 }
757 802
758 max_objects = (PAGE_SIZE << compound_order(page)) / s->size; 803 max_objects = order_objects(compound_order(page), s->size, s->reserved);
759 if (max_objects > MAX_OBJS_PER_PAGE) 804 if (max_objects > MAX_OBJS_PER_PAGE)
760 max_objects = MAX_OBJS_PER_PAGE; 805 max_objects = MAX_OBJS_PER_PAGE;
761 806
@@ -792,6 +837,49 @@ static void trace(struct kmem_cache *s, struct page *page, void *object,
792} 837}
793 838
794/* 839/*
840 * Hooks for other subsystems that check memory allocations. In a typical
841 * production configuration these hooks all should produce no code at all.
842 */
843static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
844{
845 flags &= gfp_allowed_mask;
846 lockdep_trace_alloc(flags);
847 might_sleep_if(flags & __GFP_WAIT);
848
849 return should_failslab(s->objsize, flags, s->flags);
850}
851
852static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object)
853{
854 flags &= gfp_allowed_mask;
855 kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
856 kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, flags);
857}
858
859static inline void slab_free_hook(struct kmem_cache *s, void *x)
860{
861 kmemleak_free_recursive(x, s->flags);
862
863 /*
864 * Trouble is that we may no longer disable interupts in the fast path
865 * So in order to make the debug calls that expect irqs to be
866 * disabled we need to disable interrupts temporarily.
867 */
868#if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP)
869 {
870 unsigned long flags;
871
872 local_irq_save(flags);
873 kmemcheck_slab_free(s, x, s->objsize);
874 debug_check_no_locks_freed(x, s->objsize);
875 local_irq_restore(flags);
876 }
877#endif
878 if (!(s->flags & SLAB_DEBUG_OBJECTS))
879 debug_check_no_obj_freed(x, s->objsize);
880}
881
882/*
795 * Tracking of fully allocated slabs for debugging purposes. 883 * Tracking of fully allocated slabs for debugging purposes.
796 */ 884 */
797static void add_full(struct kmem_cache_node *n, struct page *page) 885static void add_full(struct kmem_cache_node *n, struct page *page)
@@ -838,7 +926,7 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
838 * dilemma by deferring the increment of the count during 926 * dilemma by deferring the increment of the count during
839 * bootstrap (see early_kmem_cache_node_alloc). 927 * bootstrap (see early_kmem_cache_node_alloc).
840 */ 928 */
841 if (!NUMA_BUILD || n) { 929 if (n) {
842 atomic_long_inc(&n->nr_slabs); 930 atomic_long_inc(&n->nr_slabs);
843 atomic_long_add(objects, &n->total_objects); 931 atomic_long_add(objects, &n->total_objects);
844 } 932 }
@@ -858,11 +946,11 @@ static void setup_object_debug(struct kmem_cache *s, struct page *page,
858 if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON))) 946 if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON)))
859 return; 947 return;
860 948
861 init_object(s, object, 0); 949 init_object(s, object, SLUB_RED_INACTIVE);
862 init_tracking(s, object); 950 init_tracking(s, object);
863} 951}
864 952
865static int alloc_debug_processing(struct kmem_cache *s, struct page *page, 953static noinline int alloc_debug_processing(struct kmem_cache *s, struct page *page,
866 void *object, unsigned long addr) 954 void *object, unsigned long addr)
867{ 955{
868 if (!check_slab(s, page)) 956 if (!check_slab(s, page))
@@ -878,14 +966,14 @@ static int alloc_debug_processing(struct kmem_cache *s, struct page *page,
878 goto bad; 966 goto bad;
879 } 967 }
880 968
881 if (!check_object(s, page, object, 0)) 969 if (!check_object(s, page, object, SLUB_RED_INACTIVE))
882 goto bad; 970 goto bad;
883 971
884 /* Success perform special debug activities for allocs */ 972 /* Success perform special debug activities for allocs */
885 if (s->flags & SLAB_STORE_USER) 973 if (s->flags & SLAB_STORE_USER)
886 set_track(s, object, TRACK_ALLOC, addr); 974 set_track(s, object, TRACK_ALLOC, addr);
887 trace(s, page, object, 1); 975 trace(s, page, object, 1);
888 init_object(s, object, 1); 976 init_object(s, object, SLUB_RED_ACTIVE);
889 return 1; 977 return 1;
890 978
891bad: 979bad:
@@ -902,8 +990,8 @@ bad:
902 return 0; 990 return 0;
903} 991}
904 992
905static int free_debug_processing(struct kmem_cache *s, struct page *page, 993static noinline int free_debug_processing(struct kmem_cache *s,
906 void *object, unsigned long addr) 994 struct page *page, void *object, unsigned long addr)
907{ 995{
908 if (!check_slab(s, page)) 996 if (!check_slab(s, page))
909 goto fail; 997 goto fail;
@@ -918,7 +1006,7 @@ static int free_debug_processing(struct kmem_cache *s, struct page *page,
918 goto fail; 1006 goto fail;
919 } 1007 }
920 1008
921 if (!check_object(s, page, object, 1)) 1009 if (!check_object(s, page, object, SLUB_RED_ACTIVE))
922 return 0; 1010 return 0;
923 1011
924 if (unlikely(s != page->slab)) { 1012 if (unlikely(s != page->slab)) {
@@ -942,7 +1030,7 @@ static int free_debug_processing(struct kmem_cache *s, struct page *page,
942 if (s->flags & SLAB_STORE_USER) 1030 if (s->flags & SLAB_STORE_USER)
943 set_track(s, object, TRACK_FREE, addr); 1031 set_track(s, object, TRACK_FREE, addr);
944 trace(s, page, object, 0); 1032 trace(s, page, object, 0);
945 init_object(s, object, 0); 1033 init_object(s, object, SLUB_RED_INACTIVE);
946 return 1; 1034 return 1;
947 1035
948fail: 1036fail:
@@ -1046,7 +1134,7 @@ static inline int free_debug_processing(struct kmem_cache *s,
1046static inline int slab_pad_check(struct kmem_cache *s, struct page *page) 1134static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
1047 { return 1; } 1135 { return 1; }
1048static inline int check_object(struct kmem_cache *s, struct page *page, 1136static inline int check_object(struct kmem_cache *s, struct page *page,
1049 void *object, int active) { return 1; } 1137 void *object, u8 val) { return 1; }
1050static inline void add_full(struct kmem_cache_node *n, struct page *page) {} 1138static inline void add_full(struct kmem_cache_node *n, struct page *page) {}
1051static inline unsigned long kmem_cache_flags(unsigned long objsize, 1139static inline unsigned long kmem_cache_flags(unsigned long objsize,
1052 unsigned long flags, const char *name, 1140 unsigned long flags, const char *name,
@@ -1066,7 +1154,16 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node,
1066 int objects) {} 1154 int objects) {}
1067static inline void dec_slabs_node(struct kmem_cache *s, int node, 1155static inline void dec_slabs_node(struct kmem_cache *s, int node,
1068 int objects) {} 1156 int objects) {}
1069#endif 1157
1158static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
1159 { return 0; }
1160
1161static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
1162 void *object) {}
1163
1164static inline void slab_free_hook(struct kmem_cache *s, void *x) {}
1165
1166#endif /* CONFIG_SLUB_DEBUG */
1070 1167
1071/* 1168/*
1072 * Slab allocation and freeing 1169 * Slab allocation and freeing
@@ -1194,7 +1291,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
1194 slab_pad_check(s, page); 1291 slab_pad_check(s, page);
1195 for_each_object(p, s, page_address(page), 1292 for_each_object(p, s, page_address(page),
1196 page->objects) 1293 page->objects)
1197 check_object(s, page, p, 0); 1294 check_object(s, page, p, SLUB_RED_INACTIVE);
1198 } 1295 }
1199 1296
1200 kmemcheck_free_shadow(page, compound_order(page)); 1297 kmemcheck_free_shadow(page, compound_order(page));
@@ -1211,21 +1308,38 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
1211 __free_pages(page, order); 1308 __free_pages(page, order);
1212} 1309}
1213 1310
1311#define need_reserve_slab_rcu \
1312 (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head))
1313
1214static void rcu_free_slab(struct rcu_head *h) 1314static void rcu_free_slab(struct rcu_head *h)
1215{ 1315{
1216 struct page *page; 1316 struct page *page;
1217 1317
1218 page = container_of((struct list_head *)h, struct page, lru); 1318 if (need_reserve_slab_rcu)
1319 page = virt_to_head_page(h);
1320 else
1321 page = container_of((struct list_head *)h, struct page, lru);
1322
1219 __free_slab(page->slab, page); 1323 __free_slab(page->slab, page);
1220} 1324}
1221 1325
1222static void free_slab(struct kmem_cache *s, struct page *page) 1326static void free_slab(struct kmem_cache *s, struct page *page)
1223{ 1327{
1224 if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) { 1328 if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) {
1225 /* 1329 struct rcu_head *head;
1226 * RCU free overloads the RCU head over the LRU 1330
1227 */ 1331 if (need_reserve_slab_rcu) {
1228 struct rcu_head *head = (void *)&page->lru; 1332 int order = compound_order(page);
1333 int offset = (PAGE_SIZE << order) - s->reserved;
1334
1335 VM_BUG_ON(s->reserved != sizeof(*head));
1336 head = page_address(page) + offset;
1337 } else {
1338 /*
1339 * RCU free overloads the RCU head over the LRU
1340 */
1341 head = (void *)&page->lru;
1342 }
1229 1343
1230 call_rcu(head, rcu_free_slab); 1344 call_rcu(head, rcu_free_slab);
1231 } else 1345 } else
@@ -1274,13 +1388,19 @@ static void add_partial(struct kmem_cache_node *n,
1274 spin_unlock(&n->list_lock); 1388 spin_unlock(&n->list_lock);
1275} 1389}
1276 1390
1391static inline void __remove_partial(struct kmem_cache_node *n,
1392 struct page *page)
1393{
1394 list_del(&page->lru);
1395 n->nr_partial--;
1396}
1397
1277static void remove_partial(struct kmem_cache *s, struct page *page) 1398static void remove_partial(struct kmem_cache *s, struct page *page)
1278{ 1399{
1279 struct kmem_cache_node *n = get_node(s, page_to_nid(page)); 1400 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1280 1401
1281 spin_lock(&n->list_lock); 1402 spin_lock(&n->list_lock);
1282 list_del(&page->lru); 1403 __remove_partial(n, page);
1283 n->nr_partial--;
1284 spin_unlock(&n->list_lock); 1404 spin_unlock(&n->list_lock);
1285} 1405}
1286 1406
@@ -1293,8 +1413,7 @@ static inline int lock_and_freeze_slab(struct kmem_cache_node *n,
1293 struct page *page) 1413 struct page *page)
1294{ 1414{
1295 if (slab_trylock(page)) { 1415 if (slab_trylock(page)) {
1296 list_del(&page->lru); 1416 __remove_partial(n, page);
1297 n->nr_partial--;
1298 __SetPageSlubFrozen(page); 1417 __SetPageSlubFrozen(page);
1299 return 1; 1418 return 1;
1300 } 1419 }
@@ -1391,7 +1510,7 @@ static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node)
1391 int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node; 1510 int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node;
1392 1511
1393 page = get_partial_node(get_node(s, searchnode)); 1512 page = get_partial_node(get_node(s, searchnode));
1394 if (page || node != -1) 1513 if (page || node != NUMA_NO_NODE)
1395 return page; 1514 return page;
1396 1515
1397 return get_any_partial(s, flags); 1516 return get_any_partial(s, flags);
@@ -1405,6 +1524,7 @@ static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node)
1405 * On exit the slab lock will have been dropped. 1524 * On exit the slab lock will have been dropped.
1406 */ 1525 */
1407static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) 1526static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
1527 __releases(bitlock)
1408{ 1528{
1409 struct kmem_cache_node *n = get_node(s, page_to_nid(page)); 1529 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1410 1530
@@ -1443,10 +1563,77 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
1443 } 1563 }
1444} 1564}
1445 1565
1566#ifdef CONFIG_PREEMPT
1567/*
1568 * Calculate the next globally unique transaction for disambiguiation
1569 * during cmpxchg. The transactions start with the cpu number and are then
1570 * incremented by CONFIG_NR_CPUS.
1571 */
1572#define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS)
1573#else
1574/*
1575 * No preemption supported therefore also no need to check for
1576 * different cpus.
1577 */
1578#define TID_STEP 1
1579#endif
1580
1581static inline unsigned long next_tid(unsigned long tid)
1582{
1583 return tid + TID_STEP;
1584}
1585
1586static inline unsigned int tid_to_cpu(unsigned long tid)
1587{
1588 return tid % TID_STEP;
1589}
1590
1591static inline unsigned long tid_to_event(unsigned long tid)
1592{
1593 return tid / TID_STEP;
1594}
1595
1596static inline unsigned int init_tid(int cpu)
1597{
1598 return cpu;
1599}
1600
1601static inline void note_cmpxchg_failure(const char *n,
1602 const struct kmem_cache *s, unsigned long tid)
1603{
1604#ifdef SLUB_DEBUG_CMPXCHG
1605 unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
1606
1607 printk(KERN_INFO "%s %s: cmpxchg redo ", n, s->name);
1608
1609#ifdef CONFIG_PREEMPT
1610 if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
1611 printk("due to cpu change %d -> %d\n",
1612 tid_to_cpu(tid), tid_to_cpu(actual_tid));
1613 else
1614#endif
1615 if (tid_to_event(tid) != tid_to_event(actual_tid))
1616 printk("due to cpu running other code. Event %ld->%ld\n",
1617 tid_to_event(tid), tid_to_event(actual_tid));
1618 else
1619 printk("for unknown reason: actual=%lx was=%lx target=%lx\n",
1620 actual_tid, tid, next_tid(tid));
1621#endif
1622 stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
1623}
1624
1625void init_kmem_cache_cpus(struct kmem_cache *s)
1626{
1627 int cpu;
1628
1629 for_each_possible_cpu(cpu)
1630 per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
1631}
1446/* 1632/*
1447 * Remove the cpu slab 1633 * Remove the cpu slab
1448 */ 1634 */
1449static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) 1635static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1636 __releases(bitlock)
1450{ 1637{
1451 struct page *page = c->page; 1638 struct page *page = c->page;
1452 int tail = 1; 1639 int tail = 1;
@@ -1473,6 +1660,7 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1473 page->inuse--; 1660 page->inuse--;
1474 } 1661 }
1475 c->page = NULL; 1662 c->page = NULL;
1663 c->tid = next_tid(c->tid);
1476 unfreeze_slab(s, page, tail); 1664 unfreeze_slab(s, page, tail);
1477} 1665}
1478 1666
@@ -1606,33 +1794,46 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
1606 unsigned long addr, struct kmem_cache_cpu *c) 1794 unsigned long addr, struct kmem_cache_cpu *c)
1607{ 1795{
1608 void **object; 1796 void **object;
1609 struct page *new; 1797 struct page *page;
1798 unsigned long flags;
1799
1800 local_irq_save(flags);
1801#ifdef CONFIG_PREEMPT
1802 /*
1803 * We may have been preempted and rescheduled on a different
1804 * cpu before disabling interrupts. Need to reload cpu area
1805 * pointer.
1806 */
1807 c = this_cpu_ptr(s->cpu_slab);
1808#endif
1610 1809
1611 /* We handle __GFP_ZERO in the caller */ 1810 /* We handle __GFP_ZERO in the caller */
1612 gfpflags &= ~__GFP_ZERO; 1811 gfpflags &= ~__GFP_ZERO;
1613 1812
1614 if (!c->page) 1813 page = c->page;
1814 if (!page)
1615 goto new_slab; 1815 goto new_slab;
1616 1816
1617 slab_lock(c->page); 1817 slab_lock(page);
1618 if (unlikely(!node_match(c, node))) 1818 if (unlikely(!node_match(c, node)))
1619 goto another_slab; 1819 goto another_slab;
1620 1820
1621 stat(s, ALLOC_REFILL); 1821 stat(s, ALLOC_REFILL);
1622 1822
1623load_freelist: 1823load_freelist:
1624 object = c->page->freelist; 1824 object = page->freelist;
1625 if (unlikely(!object)) 1825 if (unlikely(!object))
1626 goto another_slab; 1826 goto another_slab;
1627 if (kmem_cache_debug(s)) 1827 if (kmem_cache_debug(s))
1628 goto debug; 1828 goto debug;
1629 1829
1630 c->freelist = get_freepointer(s, object); 1830 c->freelist = get_freepointer(s, object);
1631 c->page->inuse = c->page->objects; 1831 page->inuse = page->objects;
1632 c->page->freelist = NULL; 1832 page->freelist = NULL;
1633 c->node = page_to_nid(c->page); 1833
1634unlock_out: 1834 slab_unlock(page);
1635 slab_unlock(c->page); 1835 c->tid = next_tid(c->tid);
1836 local_irq_restore(flags);
1636 stat(s, ALLOC_SLOWPATH); 1837 stat(s, ALLOC_SLOWPATH);
1637 return object; 1838 return object;
1638 1839
@@ -1640,42 +1841,50 @@ another_slab:
1640 deactivate_slab(s, c); 1841 deactivate_slab(s, c);
1641 1842
1642new_slab: 1843new_slab:
1643 new = get_partial(s, gfpflags, node); 1844 page = get_partial(s, gfpflags, node);
1644 if (new) { 1845 if (page) {
1645 c->page = new;
1646 stat(s, ALLOC_FROM_PARTIAL); 1846 stat(s, ALLOC_FROM_PARTIAL);
1847 c->node = page_to_nid(page);
1848 c->page = page;
1647 goto load_freelist; 1849 goto load_freelist;
1648 } 1850 }
1649 1851
1852 gfpflags &= gfp_allowed_mask;
1650 if (gfpflags & __GFP_WAIT) 1853 if (gfpflags & __GFP_WAIT)
1651 local_irq_enable(); 1854 local_irq_enable();
1652 1855
1653 new = new_slab(s, gfpflags, node); 1856 page = new_slab(s, gfpflags, node);
1654 1857
1655 if (gfpflags & __GFP_WAIT) 1858 if (gfpflags & __GFP_WAIT)
1656 local_irq_disable(); 1859 local_irq_disable();
1657 1860
1658 if (new) { 1861 if (page) {
1659 c = __this_cpu_ptr(s->cpu_slab); 1862 c = __this_cpu_ptr(s->cpu_slab);
1660 stat(s, ALLOC_SLAB); 1863 stat(s, ALLOC_SLAB);
1661 if (c->page) 1864 if (c->page)
1662 flush_slab(s, c); 1865 flush_slab(s, c);
1663 slab_lock(new); 1866
1664 __SetPageSlubFrozen(new); 1867 slab_lock(page);
1665 c->page = new; 1868 __SetPageSlubFrozen(page);
1869 c->node = page_to_nid(page);
1870 c->page = page;
1666 goto load_freelist; 1871 goto load_freelist;
1667 } 1872 }
1668 if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit()) 1873 if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
1669 slab_out_of_memory(s, gfpflags, node); 1874 slab_out_of_memory(s, gfpflags, node);
1875 local_irq_restore(flags);
1670 return NULL; 1876 return NULL;
1671debug: 1877debug:
1672 if (!alloc_debug_processing(s, c->page, object, addr)) 1878 if (!alloc_debug_processing(s, page, object, addr))
1673 goto another_slab; 1879 goto another_slab;
1674 1880
1675 c->page->inuse++; 1881 page->inuse++;
1676 c->page->freelist = get_freepointer(s, object); 1882 page->freelist = get_freepointer(s, object);
1677 c->node = -1; 1883 deactivate_slab(s, c);
1678 goto unlock_out; 1884 c->page = NULL;
1885 c->node = NUMA_NO_NODE;
1886 local_irq_restore(flags);
1887 return object;
1679} 1888}
1680 1889
1681/* 1890/*
@@ -1693,34 +1902,63 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
1693{ 1902{
1694 void **object; 1903 void **object;
1695 struct kmem_cache_cpu *c; 1904 struct kmem_cache_cpu *c;
1696 unsigned long flags; 1905 unsigned long tid;
1697
1698 gfpflags &= gfp_allowed_mask;
1699 1906
1700 lockdep_trace_alloc(gfpflags); 1907 if (slab_pre_alloc_hook(s, gfpflags))
1701 might_sleep_if(gfpflags & __GFP_WAIT);
1702
1703 if (should_failslab(s->objsize, gfpflags, s->flags))
1704 return NULL; 1908 return NULL;
1705 1909
1706 local_irq_save(flags); 1910redo:
1911
1912 /*
1913 * Must read kmem_cache cpu data via this cpu ptr. Preemption is
1914 * enabled. We may switch back and forth between cpus while
1915 * reading from one cpu area. That does not matter as long
1916 * as we end up on the original cpu again when doing the cmpxchg.
1917 */
1707 c = __this_cpu_ptr(s->cpu_slab); 1918 c = __this_cpu_ptr(s->cpu_slab);
1919
1920 /*
1921 * The transaction ids are globally unique per cpu and per operation on
1922 * a per cpu queue. Thus they can be guarantee that the cmpxchg_double
1923 * occurs on the right processor and that there was no operation on the
1924 * linked list in between.
1925 */
1926 tid = c->tid;
1927 barrier();
1928
1708 object = c->freelist; 1929 object = c->freelist;
1709 if (unlikely(!object || !node_match(c, node))) 1930 if (unlikely(!object || !node_match(c, node)))
1710 1931
1711 object = __slab_alloc(s, gfpflags, node, addr, c); 1932 object = __slab_alloc(s, gfpflags, node, addr, c);
1712 1933
1713 else { 1934 else {
1714 c->freelist = get_freepointer(s, object); 1935 /*
1936 * The cmpxchg will only match if there was no additional
1937 * operation and if we are on the right processor.
1938 *
1939 * The cmpxchg does the following atomically (without lock semantics!)
1940 * 1. Relocate first pointer to the current per cpu area.
1941 * 2. Verify that tid and freelist have not been changed
1942 * 3. If they were not changed replace tid and freelist
1943 *
1944 * Since this is without lock semantics the protection is only against
1945 * code executing on this cpu *not* from access by other cpus.
1946 */
1947 if (unlikely(!irqsafe_cpu_cmpxchg_double(
1948 s->cpu_slab->freelist, s->cpu_slab->tid,
1949 object, tid,
1950 get_freepointer_safe(s, object), next_tid(tid)))) {
1951
1952 note_cmpxchg_failure("slab_alloc", s, tid);
1953 goto redo;
1954 }
1715 stat(s, ALLOC_FASTPATH); 1955 stat(s, ALLOC_FASTPATH);
1716 } 1956 }
1717 local_irq_restore(flags);
1718 1957
1719 if (unlikely(gfpflags & __GFP_ZERO) && object) 1958 if (unlikely(gfpflags & __GFP_ZERO) && object)
1720 memset(object, 0, s->objsize); 1959 memset(object, 0, s->objsize);
1721 1960
1722 kmemcheck_slab_alloc(s, gfpflags, object, s->objsize); 1961 slab_post_alloc_hook(s, gfpflags, object);
1723 kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, gfpflags);
1724 1962
1725 return object; 1963 return object;
1726} 1964}
@@ -1736,11 +1974,21 @@ void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
1736EXPORT_SYMBOL(kmem_cache_alloc); 1974EXPORT_SYMBOL(kmem_cache_alloc);
1737 1975
1738#ifdef CONFIG_TRACING 1976#ifdef CONFIG_TRACING
1739void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags) 1977void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
1740{ 1978{
1741 return slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_); 1979 void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_);
1980 trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
1981 return ret;
1982}
1983EXPORT_SYMBOL(kmem_cache_alloc_trace);
1984
1985void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
1986{
1987 void *ret = kmalloc_order(size, flags, order);
1988 trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
1989 return ret;
1742} 1990}
1743EXPORT_SYMBOL(kmem_cache_alloc_notrace); 1991EXPORT_SYMBOL(kmalloc_order_trace);
1744#endif 1992#endif
1745 1993
1746#ifdef CONFIG_NUMA 1994#ifdef CONFIG_NUMA
@@ -1754,16 +2002,20 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
1754 return ret; 2002 return ret;
1755} 2003}
1756EXPORT_SYMBOL(kmem_cache_alloc_node); 2004EXPORT_SYMBOL(kmem_cache_alloc_node);
1757#endif
1758 2005
1759#ifdef CONFIG_TRACING 2006#ifdef CONFIG_TRACING
1760void *kmem_cache_alloc_node_notrace(struct kmem_cache *s, 2007void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
1761 gfp_t gfpflags, 2008 gfp_t gfpflags,
1762 int node) 2009 int node, size_t size)
1763{ 2010{
1764 return slab_alloc(s, gfpflags, node, _RET_IP_); 2011 void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);
2012
2013 trace_kmalloc_node(_RET_IP_, ret,
2014 size, s->size, gfpflags, node);
2015 return ret;
1765} 2016}
1766EXPORT_SYMBOL(kmem_cache_alloc_node_notrace); 2017EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
2018#endif
1767#endif 2019#endif
1768 2020
1769/* 2021/*
@@ -1779,14 +2031,15 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
1779{ 2031{
1780 void *prior; 2032 void *prior;
1781 void **object = (void *)x; 2033 void **object = (void *)x;
2034 unsigned long flags;
1782 2035
1783 stat(s, FREE_SLOWPATH); 2036 local_irq_save(flags);
1784 slab_lock(page); 2037 slab_lock(page);
2038 stat(s, FREE_SLOWPATH);
1785 2039
1786 if (kmem_cache_debug(s)) 2040 if (kmem_cache_debug(s) && !free_debug_processing(s, page, x, addr))
1787 goto debug; 2041 goto out_unlock;
1788 2042
1789checks_ok:
1790 prior = page->freelist; 2043 prior = page->freelist;
1791 set_freepointer(s, object, prior); 2044 set_freepointer(s, object, prior);
1792 page->freelist = object; 2045 page->freelist = object;
@@ -1811,6 +2064,7 @@ checks_ok:
1811 2064
1812out_unlock: 2065out_unlock:
1813 slab_unlock(page); 2066 slab_unlock(page);
2067 local_irq_restore(flags);
1814 return; 2068 return;
1815 2069
1816slab_empty: 2070slab_empty:
@@ -1822,14 +2076,9 @@ slab_empty:
1822 stat(s, FREE_REMOVE_PARTIAL); 2076 stat(s, FREE_REMOVE_PARTIAL);
1823 } 2077 }
1824 slab_unlock(page); 2078 slab_unlock(page);
2079 local_irq_restore(flags);
1825 stat(s, FREE_SLAB); 2080 stat(s, FREE_SLAB);
1826 discard_slab(s, page); 2081 discard_slab(s, page);
1827 return;
1828
1829debug:
1830 if (!free_debug_processing(s, page, x, addr))
1831 goto out_unlock;
1832 goto checks_ok;
1833} 2082}
1834 2083
1835/* 2084/*
@@ -1848,23 +2097,38 @@ static __always_inline void slab_free(struct kmem_cache *s,
1848{ 2097{
1849 void **object = (void *)x; 2098 void **object = (void *)x;
1850 struct kmem_cache_cpu *c; 2099 struct kmem_cache_cpu *c;
1851 unsigned long flags; 2100 unsigned long tid;
1852 2101
1853 kmemleak_free_recursive(x, s->flags); 2102 slab_free_hook(s, x);
1854 local_irq_save(flags); 2103
2104redo:
2105
2106 /*
2107 * Determine the currently cpus per cpu slab.
2108 * The cpu may change afterward. However that does not matter since
2109 * data is retrieved via this pointer. If we are on the same cpu
2110 * during the cmpxchg then the free will succedd.
2111 */
1855 c = __this_cpu_ptr(s->cpu_slab); 2112 c = __this_cpu_ptr(s->cpu_slab);
1856 kmemcheck_slab_free(s, object, s->objsize); 2113
1857 debug_check_no_locks_freed(object, s->objsize); 2114 tid = c->tid;
1858 if (!(s->flags & SLAB_DEBUG_OBJECTS)) 2115 barrier();
1859 debug_check_no_obj_freed(object, s->objsize); 2116
1860 if (likely(page == c->page && c->node >= 0)) { 2117 if (likely(page == c->page)) {
1861 set_freepointer(s, object, c->freelist); 2118 set_freepointer(s, object, c->freelist);
1862 c->freelist = object; 2119
2120 if (unlikely(!irqsafe_cpu_cmpxchg_double(
2121 s->cpu_slab->freelist, s->cpu_slab->tid,
2122 c->freelist, tid,
2123 object, next_tid(tid)))) {
2124
2125 note_cmpxchg_failure("slab_free", s, tid);
2126 goto redo;
2127 }
1863 stat(s, FREE_FASTPATH); 2128 stat(s, FREE_FASTPATH);
1864 } else 2129 } else
1865 __slab_free(s, page, x, addr); 2130 __slab_free(s, page, x, addr);
1866 2131
1867 local_irq_restore(flags);
1868} 2132}
1869 2133
1870void kmem_cache_free(struct kmem_cache *s, void *x) 2134void kmem_cache_free(struct kmem_cache *s, void *x)
@@ -1879,17 +2143,6 @@ void kmem_cache_free(struct kmem_cache *s, void *x)
1879} 2143}
1880EXPORT_SYMBOL(kmem_cache_free); 2144EXPORT_SYMBOL(kmem_cache_free);
1881 2145
1882/* Figure out on which slab page the object resides */
1883static struct page *get_object_page(const void *x)
1884{
1885 struct page *page = virt_to_head_page(x);
1886
1887 if (!PageSlab(page))
1888 return NULL;
1889
1890 return page;
1891}
1892
1893/* 2146/*
1894 * Object placement in a slab is made very easy because we always start at 2147 * Object placement in a slab is made very easy because we always start at
1895 * offset 0. If we tune the size of the object to the alignment then we can 2148 * offset 0. If we tune the size of the object to the alignment then we can
@@ -1945,13 +2198,13 @@ static int slub_nomerge;
1945 * the smallest order which will fit the object. 2198 * the smallest order which will fit the object.
1946 */ 2199 */
1947static inline int slab_order(int size, int min_objects, 2200static inline int slab_order(int size, int min_objects,
1948 int max_order, int fract_leftover) 2201 int max_order, int fract_leftover, int reserved)
1949{ 2202{
1950 int order; 2203 int order;
1951 int rem; 2204 int rem;
1952 int min_order = slub_min_order; 2205 int min_order = slub_min_order;
1953 2206
1954 if ((PAGE_SIZE << min_order) / size > MAX_OBJS_PER_PAGE) 2207 if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE)
1955 return get_order(size * MAX_OBJS_PER_PAGE) - 1; 2208 return get_order(size * MAX_OBJS_PER_PAGE) - 1;
1956 2209
1957 for (order = max(min_order, 2210 for (order = max(min_order,
@@ -1960,10 +2213,10 @@ static inline int slab_order(int size, int min_objects,
1960 2213
1961 unsigned long slab_size = PAGE_SIZE << order; 2214 unsigned long slab_size = PAGE_SIZE << order;
1962 2215
1963 if (slab_size < min_objects * size) 2216 if (slab_size < min_objects * size + reserved)
1964 continue; 2217 continue;
1965 2218
1966 rem = slab_size % size; 2219 rem = (slab_size - reserved) % size;
1967 2220
1968 if (rem <= slab_size / fract_leftover) 2221 if (rem <= slab_size / fract_leftover)
1969 break; 2222 break;
@@ -1973,7 +2226,7 @@ static inline int slab_order(int size, int min_objects,
1973 return order; 2226 return order;
1974} 2227}
1975 2228
1976static inline int calculate_order(int size) 2229static inline int calculate_order(int size, int reserved)
1977{ 2230{
1978 int order; 2231 int order;
1979 int min_objects; 2232 int min_objects;
@@ -1991,14 +2244,14 @@ static inline int calculate_order(int size)
1991 min_objects = slub_min_objects; 2244 min_objects = slub_min_objects;
1992 if (!min_objects) 2245 if (!min_objects)
1993 min_objects = 4 * (fls(nr_cpu_ids) + 1); 2246 min_objects = 4 * (fls(nr_cpu_ids) + 1);
1994 max_objects = (PAGE_SIZE << slub_max_order)/size; 2247 max_objects = order_objects(slub_max_order, size, reserved);
1995 min_objects = min(min_objects, max_objects); 2248 min_objects = min(min_objects, max_objects);
1996 2249
1997 while (min_objects > 1) { 2250 while (min_objects > 1) {
1998 fraction = 16; 2251 fraction = 16;
1999 while (fraction >= 4) { 2252 while (fraction >= 4) {
2000 order = slab_order(size, min_objects, 2253 order = slab_order(size, min_objects,
2001 slub_max_order, fraction); 2254 slub_max_order, fraction, reserved);
2002 if (order <= slub_max_order) 2255 if (order <= slub_max_order)
2003 return order; 2256 return order;
2004 fraction /= 2; 2257 fraction /= 2;
@@ -2010,14 +2263,14 @@ static inline int calculate_order(int size)
2010 * We were unable to place multiple objects in a slab. Now 2263 * We were unable to place multiple objects in a slab. Now
2011 * lets see if we can place a single object there. 2264 * lets see if we can place a single object there.
2012 */ 2265 */
2013 order = slab_order(size, 1, slub_max_order, 1); 2266 order = slab_order(size, 1, slub_max_order, 1, reserved);
2014 if (order <= slub_max_order) 2267 if (order <= slub_max_order)
2015 return order; 2268 return order;
2016 2269
2017 /* 2270 /*
2018 * Doh this slab cannot be placed using slub_max_order. 2271 * Doh this slab cannot be placed using slub_max_order.
2019 */ 2272 */
2020 order = slab_order(size, 1, MAX_ORDER, 1); 2273 order = slab_order(size, 1, MAX_ORDER, 1, reserved);
2021 if (order < MAX_ORDER) 2274 if (order < MAX_ORDER)
2022 return order; 2275 return order;
2023 return -ENOSYS; 2276 return -ENOSYS;
@@ -2062,26 +2315,28 @@ init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s)
2062#endif 2315#endif
2063} 2316}
2064 2317
2065static DEFINE_PER_CPU(struct kmem_cache_cpu, kmalloc_percpu[KMALLOC_CACHES]); 2318static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
2066
2067static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
2068{ 2319{
2069 if (s < kmalloc_caches + KMALLOC_CACHES && s >= kmalloc_caches) 2320 BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
2070 /* 2321 SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu));
2071 * Boot time creation of the kmalloc array. Use static per cpu data 2322
2072 * since the per cpu allocator is not available yet. 2323 /*
2073 */ 2324 * Must align to double word boundary for the double cmpxchg
2074 s->cpu_slab = kmalloc_percpu + (s - kmalloc_caches); 2325 * instructions to work; see __pcpu_double_call_return_bool().
2075 else 2326 */
2076 s->cpu_slab = alloc_percpu(struct kmem_cache_cpu); 2327 s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),
2328 2 * sizeof(void *));
2077 2329
2078 if (!s->cpu_slab) 2330 if (!s->cpu_slab)
2079 return 0; 2331 return 0;
2080 2332
2333 init_kmem_cache_cpus(s);
2334
2081 return 1; 2335 return 1;
2082} 2336}
2083 2337
2084#ifdef CONFIG_NUMA 2338static struct kmem_cache *kmem_cache_node;
2339
2085/* 2340/*
2086 * No kmalloc_node yet so do it by hand. We know that this is the first 2341 * No kmalloc_node yet so do it by hand. We know that this is the first
2087 * slab on the node for this slabcache. There are no concurrent accesses 2342 * slab on the node for this slabcache. There are no concurrent accesses
@@ -2091,15 +2346,15 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
2091 * when allocating for the kmalloc_node_cache. This is used for bootstrapping 2346 * when allocating for the kmalloc_node_cache. This is used for bootstrapping
2092 * memory on a fresh node that has no slab structures yet. 2347 * memory on a fresh node that has no slab structures yet.
2093 */ 2348 */
2094static void early_kmem_cache_node_alloc(gfp_t gfpflags, int node) 2349static void early_kmem_cache_node_alloc(int node)
2095{ 2350{
2096 struct page *page; 2351 struct page *page;
2097 struct kmem_cache_node *n; 2352 struct kmem_cache_node *n;
2098 unsigned long flags; 2353 unsigned long flags;
2099 2354
2100 BUG_ON(kmalloc_caches->size < sizeof(struct kmem_cache_node)); 2355 BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));
2101 2356
2102 page = new_slab(kmalloc_caches, gfpflags, node); 2357 page = new_slab(kmem_cache_node, GFP_NOWAIT, node);
2103 2358
2104 BUG_ON(!page); 2359 BUG_ON(!page);
2105 if (page_to_nid(page) != node) { 2360 if (page_to_nid(page) != node) {
@@ -2111,15 +2366,15 @@ static void early_kmem_cache_node_alloc(gfp_t gfpflags, int node)
2111 2366
2112 n = page->freelist; 2367 n = page->freelist;
2113 BUG_ON(!n); 2368 BUG_ON(!n);
2114 page->freelist = get_freepointer(kmalloc_caches, n); 2369 page->freelist = get_freepointer(kmem_cache_node, n);
2115 page->inuse++; 2370 page->inuse++;
2116 kmalloc_caches->node[node] = n; 2371 kmem_cache_node->node[node] = n;
2117#ifdef CONFIG_SLUB_DEBUG 2372#ifdef CONFIG_SLUB_DEBUG
2118 init_object(kmalloc_caches, n, 1); 2373 init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
2119 init_tracking(kmalloc_caches, n); 2374 init_tracking(kmem_cache_node, n);
2120#endif 2375#endif
2121 init_kmem_cache_node(n, kmalloc_caches); 2376 init_kmem_cache_node(n, kmem_cache_node);
2122 inc_slabs_node(kmalloc_caches, node, page->objects); 2377 inc_slabs_node(kmem_cache_node, node, page->objects);
2123 2378
2124 /* 2379 /*
2125 * lockdep requires consistent irq usage for each lock 2380 * lockdep requires consistent irq usage for each lock
@@ -2137,13 +2392,15 @@ static void free_kmem_cache_nodes(struct kmem_cache *s)
2137 2392
2138 for_each_node_state(node, N_NORMAL_MEMORY) { 2393 for_each_node_state(node, N_NORMAL_MEMORY) {
2139 struct kmem_cache_node *n = s->node[node]; 2394 struct kmem_cache_node *n = s->node[node];
2395
2140 if (n) 2396 if (n)
2141 kmem_cache_free(kmalloc_caches, n); 2397 kmem_cache_free(kmem_cache_node, n);
2398
2142 s->node[node] = NULL; 2399 s->node[node] = NULL;
2143 } 2400 }
2144} 2401}
2145 2402
2146static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags) 2403static int init_kmem_cache_nodes(struct kmem_cache *s)
2147{ 2404{
2148 int node; 2405 int node;
2149 2406
@@ -2151,11 +2408,11 @@ static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags)
2151 struct kmem_cache_node *n; 2408 struct kmem_cache_node *n;
2152 2409
2153 if (slab_state == DOWN) { 2410 if (slab_state == DOWN) {
2154 early_kmem_cache_node_alloc(gfpflags, node); 2411 early_kmem_cache_node_alloc(node);
2155 continue; 2412 continue;
2156 } 2413 }
2157 n = kmem_cache_alloc_node(kmalloc_caches, 2414 n = kmem_cache_alloc_node(kmem_cache_node,
2158 gfpflags, node); 2415 GFP_KERNEL, node);
2159 2416
2160 if (!n) { 2417 if (!n) {
2161 free_kmem_cache_nodes(s); 2418 free_kmem_cache_nodes(s);
@@ -2167,17 +2424,6 @@ static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags)
2167 } 2424 }
2168 return 1; 2425 return 1;
2169} 2426}
2170#else
2171static void free_kmem_cache_nodes(struct kmem_cache *s)
2172{
2173}
2174
2175static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags)
2176{
2177 init_kmem_cache_node(&s->local_node, s);
2178 return 1;
2179}
2180#endif
2181 2427
2182static void set_min_partial(struct kmem_cache *s, unsigned long min) 2428static void set_min_partial(struct kmem_cache *s, unsigned long min)
2183{ 2429{
@@ -2285,7 +2531,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
2285 if (forced_order >= 0) 2531 if (forced_order >= 0)
2286 order = forced_order; 2532 order = forced_order;
2287 else 2533 else
2288 order = calculate_order(size); 2534 order = calculate_order(size, s->reserved);
2289 2535
2290 if (order < 0) 2536 if (order < 0)
2291 return 0; 2537 return 0;
@@ -2303,8 +2549,8 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
2303 /* 2549 /*
2304 * Determine the number of objects per slab 2550 * Determine the number of objects per slab
2305 */ 2551 */
2306 s->oo = oo_make(order, size); 2552 s->oo = oo_make(order, size, s->reserved);
2307 s->min = oo_make(get_order(size), size); 2553 s->min = oo_make(get_order(size), size, s->reserved);
2308 if (oo_objects(s->oo) > oo_objects(s->max)) 2554 if (oo_objects(s->oo) > oo_objects(s->max))
2309 s->max = s->oo; 2555 s->max = s->oo;
2310 2556
@@ -2312,7 +2558,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
2312 2558
2313} 2559}
2314 2560
2315static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, 2561static int kmem_cache_open(struct kmem_cache *s,
2316 const char *name, size_t size, 2562 const char *name, size_t size,
2317 size_t align, unsigned long flags, 2563 size_t align, unsigned long flags,
2318 void (*ctor)(void *)) 2564 void (*ctor)(void *))
@@ -2323,6 +2569,10 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags,
2323 s->objsize = size; 2569 s->objsize = size;
2324 s->align = align; 2570 s->align = align;
2325 s->flags = kmem_cache_flags(size, flags, name, ctor); 2571 s->flags = kmem_cache_flags(size, flags, name, ctor);
2572 s->reserved = 0;
2573
2574 if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU))
2575 s->reserved = sizeof(struct rcu_head);
2326 2576
2327 if (!calculate_sizes(s, -1)) 2577 if (!calculate_sizes(s, -1))
2328 goto error; 2578 goto error;
@@ -2348,10 +2598,10 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags,
2348#ifdef CONFIG_NUMA 2598#ifdef CONFIG_NUMA
2349 s->remote_node_defrag_ratio = 1000; 2599 s->remote_node_defrag_ratio = 1000;
2350#endif 2600#endif
2351 if (!init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA)) 2601 if (!init_kmem_cache_nodes(s))
2352 goto error; 2602 goto error;
2353 2603
2354 if (alloc_kmem_cache_cpus(s, gfpflags & ~SLUB_DMA)) 2604 if (alloc_kmem_cache_cpus(s))
2355 return 1; 2605 return 1;
2356 2606
2357 free_kmem_cache_nodes(s); 2607 free_kmem_cache_nodes(s);
@@ -2365,35 +2615,6 @@ error:
2365} 2615}
2366 2616
2367/* 2617/*
2368 * Check if a given pointer is valid
2369 */
2370int kmem_ptr_validate(struct kmem_cache *s, const void *object)
2371{
2372 struct page *page;
2373
2374 if (!kern_ptr_validate(object, s->size))
2375 return 0;
2376
2377 page = get_object_page(object);
2378
2379 if (!page || s != page->slab)
2380 /* No slab or wrong slab */
2381 return 0;
2382
2383 if (!check_valid_pointer(s, page, object))
2384 return 0;
2385
2386 /*
2387 * We could also check if the object is on the slabs freelist.
2388 * But this would be too expensive and it seems that the main
2389 * purpose of kmem_ptr_valid() is to check if the object belongs
2390 * to a certain slab.
2391 */
2392 return 1;
2393}
2394EXPORT_SYMBOL(kmem_ptr_validate);
2395
2396/*
2397 * Determine the size of a slab object 2618 * Determine the size of a slab object
2398 */ 2619 */
2399unsigned int kmem_cache_size(struct kmem_cache *s) 2620unsigned int kmem_cache_size(struct kmem_cache *s)
@@ -2402,28 +2623,20 @@ unsigned int kmem_cache_size(struct kmem_cache *s)
2402} 2623}
2403EXPORT_SYMBOL(kmem_cache_size); 2624EXPORT_SYMBOL(kmem_cache_size);
2404 2625
2405const char *kmem_cache_name(struct kmem_cache *s)
2406{
2407 return s->name;
2408}
2409EXPORT_SYMBOL(kmem_cache_name);
2410
2411static void list_slab_objects(struct kmem_cache *s, struct page *page, 2626static void list_slab_objects(struct kmem_cache *s, struct page *page,
2412 const char *text) 2627 const char *text)
2413{ 2628{
2414#ifdef CONFIG_SLUB_DEBUG 2629#ifdef CONFIG_SLUB_DEBUG
2415 void *addr = page_address(page); 2630 void *addr = page_address(page);
2416 void *p; 2631 void *p;
2417 long *map = kzalloc(BITS_TO_LONGS(page->objects) * sizeof(long), 2632 unsigned long *map = kzalloc(BITS_TO_LONGS(page->objects) *
2418 GFP_ATOMIC); 2633 sizeof(long), GFP_ATOMIC);
2419
2420 if (!map) 2634 if (!map)
2421 return; 2635 return;
2422 slab_err(s, page, "%s", text); 2636 slab_err(s, page, "%s", text);
2423 slab_lock(page); 2637 slab_lock(page);
2424 for_each_free_object(p, s, page->freelist)
2425 set_bit(slab_index(p, s, addr), map);
2426 2638
2639 get_map(s, page, map);
2427 for_each_object(p, s, addr, page->objects) { 2640 for_each_object(p, s, addr, page->objects) {
2428 2641
2429 if (!test_bit(slab_index(p, s, addr), map)) { 2642 if (!test_bit(slab_index(p, s, addr), map)) {
@@ -2448,9 +2661,8 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
2448 spin_lock_irqsave(&n->list_lock, flags); 2661 spin_lock_irqsave(&n->list_lock, flags);
2449 list_for_each_entry_safe(page, h, &n->partial, lru) { 2662 list_for_each_entry_safe(page, h, &n->partial, lru) {
2450 if (!page->inuse) { 2663 if (!page->inuse) {
2451 list_del(&page->lru); 2664 __remove_partial(n, page);
2452 discard_slab(s, page); 2665 discard_slab(s, page);
2453 n->nr_partial--;
2454 } else { 2666 } else {
2455 list_slab_objects(s, page, 2667 list_slab_objects(s, page,
2456 "Objects remaining on kmem_cache_close()"); 2668 "Objects remaining on kmem_cache_close()");
@@ -2507,9 +2719,15 @@ EXPORT_SYMBOL(kmem_cache_destroy);
2507 * Kmalloc subsystem 2719 * Kmalloc subsystem
2508 *******************************************************************/ 2720 *******************************************************************/
2509 2721
2510struct kmem_cache kmalloc_caches[KMALLOC_CACHES] __cacheline_aligned; 2722struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT];
2511EXPORT_SYMBOL(kmalloc_caches); 2723EXPORT_SYMBOL(kmalloc_caches);
2512 2724
2725static struct kmem_cache *kmem_cache;
2726
2727#ifdef CONFIG_ZONE_DMA
2728static struct kmem_cache *kmalloc_dma_caches[SLUB_PAGE_SHIFT];
2729#endif
2730
2513static int __init setup_slub_min_order(char *str) 2731static int __init setup_slub_min_order(char *str)
2514{ 2732{
2515 get_option(&str, &slub_min_order); 2733 get_option(&str, &slub_min_order);
@@ -2546,116 +2764,29 @@ static int __init setup_slub_nomerge(char *str)
2546 2764
2547__setup("slub_nomerge", setup_slub_nomerge); 2765__setup("slub_nomerge", setup_slub_nomerge);
2548 2766
2549static struct kmem_cache *create_kmalloc_cache(struct kmem_cache *s, 2767static struct kmem_cache *__init create_kmalloc_cache(const char *name,
2550 const char *name, int size, gfp_t gfp_flags) 2768 int size, unsigned int flags)
2551{ 2769{
2552 unsigned int flags = 0; 2770 struct kmem_cache *s;
2553 2771
2554 if (gfp_flags & SLUB_DMA) 2772 s = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
2555 flags = SLAB_CACHE_DMA;
2556 2773
2557 /* 2774 /*
2558 * This function is called with IRQs disabled during early-boot on 2775 * This function is called with IRQs disabled during early-boot on
2559 * single CPU so there's no need to take slub_lock here. 2776 * single CPU so there's no need to take slub_lock here.
2560 */ 2777 */
2561 if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN, 2778 if (!kmem_cache_open(s, name, size, ARCH_KMALLOC_MINALIGN,
2562 flags, NULL)) 2779 flags, NULL))
2563 goto panic; 2780 goto panic;
2564 2781
2565 list_add(&s->list, &slab_caches); 2782 list_add(&s->list, &slab_caches);
2566
2567 if (sysfs_slab_add(s))
2568 goto panic;
2569 return s; 2783 return s;
2570 2784
2571panic: 2785panic:
2572 panic("Creation of kmalloc slab %s size=%d failed.\n", name, size); 2786 panic("Creation of kmalloc slab %s size=%d failed.\n", name, size);
2787 return NULL;
2573} 2788}
2574 2789
2575#ifdef CONFIG_ZONE_DMA
2576static struct kmem_cache *kmalloc_caches_dma[SLUB_PAGE_SHIFT];
2577
2578static void sysfs_add_func(struct work_struct *w)
2579{
2580 struct kmem_cache *s;
2581
2582 down_write(&slub_lock);
2583 list_for_each_entry(s, &slab_caches, list) {
2584 if (s->flags & __SYSFS_ADD_DEFERRED) {
2585 s->flags &= ~__SYSFS_ADD_DEFERRED;
2586 sysfs_slab_add(s);
2587 }
2588 }
2589 up_write(&slub_lock);
2590}
2591
2592static DECLARE_WORK(sysfs_add_work, sysfs_add_func);
2593
2594static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags)
2595{
2596 struct kmem_cache *s;
2597 char *text;
2598 size_t realsize;
2599 unsigned long slabflags;
2600 int i;
2601
2602 s = kmalloc_caches_dma[index];
2603 if (s)
2604 return s;
2605
2606 /* Dynamically create dma cache */
2607 if (flags & __GFP_WAIT)
2608 down_write(&slub_lock);
2609 else {
2610 if (!down_write_trylock(&slub_lock))
2611 goto out;
2612 }
2613
2614 if (kmalloc_caches_dma[index])
2615 goto unlock_out;
2616
2617 realsize = kmalloc_caches[index].objsize;
2618 text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d",
2619 (unsigned int)realsize);
2620
2621 s = NULL;
2622 for (i = 0; i < KMALLOC_CACHES; i++)
2623 if (!kmalloc_caches[i].size)
2624 break;
2625
2626 BUG_ON(i >= KMALLOC_CACHES);
2627 s = kmalloc_caches + i;
2628
2629 /*
2630 * Must defer sysfs creation to a workqueue because we don't know
2631 * what context we are called from. Before sysfs comes up, we don't
2632 * need to do anything because our sysfs initcall will start by
2633 * adding all existing slabs to sysfs.
2634 */
2635 slabflags = SLAB_CACHE_DMA|SLAB_NOTRACK;
2636 if (slab_state >= SYSFS)
2637 slabflags |= __SYSFS_ADD_DEFERRED;
2638
2639 if (!text || !kmem_cache_open(s, flags, text,
2640 realsize, ARCH_KMALLOC_MINALIGN, slabflags, NULL)) {
2641 s->size = 0;
2642 kfree(text);
2643 goto unlock_out;
2644 }
2645
2646 list_add(&s->list, &slab_caches);
2647 kmalloc_caches_dma[index] = s;
2648
2649 if (slab_state >= SYSFS)
2650 schedule_work(&sysfs_add_work);
2651
2652unlock_out:
2653 up_write(&slub_lock);
2654out:
2655 return kmalloc_caches_dma[index];
2656}
2657#endif
2658
2659/* 2790/*
2660 * Conversion table for small slabs sizes / 8 to the index in the 2791 * Conversion table for small slabs sizes / 8 to the index in the
2661 * kmalloc array. This is necessary for slabs < 192 since we have non power 2792 * kmalloc array. This is necessary for slabs < 192 since we have non power
@@ -2708,10 +2839,10 @@ static struct kmem_cache *get_slab(size_t size, gfp_t flags)
2708 2839
2709#ifdef CONFIG_ZONE_DMA 2840#ifdef CONFIG_ZONE_DMA
2710 if (unlikely((flags & SLUB_DMA))) 2841 if (unlikely((flags & SLUB_DMA)))
2711 return dma_kmalloc_cache(index, flags); 2842 return kmalloc_dma_caches[index];
2712 2843
2713#endif 2844#endif
2714 return &kmalloc_caches[index]; 2845 return kmalloc_caches[index];
2715} 2846}
2716 2847
2717void *__kmalloc(size_t size, gfp_t flags) 2848void *__kmalloc(size_t size, gfp_t flags)
@@ -2735,6 +2866,7 @@ void *__kmalloc(size_t size, gfp_t flags)
2735} 2866}
2736EXPORT_SYMBOL(__kmalloc); 2867EXPORT_SYMBOL(__kmalloc);
2737 2868
2869#ifdef CONFIG_NUMA
2738static void *kmalloc_large_node(size_t size, gfp_t flags, int node) 2870static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
2739{ 2871{
2740 struct page *page; 2872 struct page *page;
@@ -2749,7 +2881,6 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
2749 return ptr; 2881 return ptr;
2750} 2882}
2751 2883
2752#ifdef CONFIG_NUMA
2753void *__kmalloc_node(size_t size, gfp_t flags, int node) 2884void *__kmalloc_node(size_t size, gfp_t flags, int node)
2754{ 2885{
2755 struct kmem_cache *s; 2886 struct kmem_cache *s;
@@ -2782,7 +2913,6 @@ EXPORT_SYMBOL(__kmalloc_node);
2782size_t ksize(const void *object) 2913size_t ksize(const void *object)
2783{ 2914{
2784 struct page *page; 2915 struct page *page;
2785 struct kmem_cache *s;
2786 2916
2787 if (unlikely(object == ZERO_SIZE_PTR)) 2917 if (unlikely(object == ZERO_SIZE_PTR))
2788 return 0; 2918 return 0;
@@ -2793,28 +2923,8 @@ size_t ksize(const void *object)
2793 WARN_ON(!PageCompound(page)); 2923 WARN_ON(!PageCompound(page));
2794 return PAGE_SIZE << compound_order(page); 2924 return PAGE_SIZE << compound_order(page);
2795 } 2925 }
2796 s = page->slab;
2797 2926
2798#ifdef CONFIG_SLUB_DEBUG 2927 return slab_ksize(page->slab);
2799 /*
2800 * Debugging requires use of the padding between object
2801 * and whatever may come after it.
2802 */
2803 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
2804 return s->objsize;
2805
2806#endif
2807 /*
2808 * If we have the need to store the freelist pointer
2809 * back there or track user information then we can
2810 * only use the space before that information.
2811 */
2812 if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
2813 return s->inuse;
2814 /*
2815 * Else we can use all the padding etc for the allocation
2816 */
2817 return s->size;
2818} 2928}
2819EXPORT_SYMBOL(ksize); 2929EXPORT_SYMBOL(ksize);
2820 2930
@@ -2889,8 +2999,7 @@ int kmem_cache_shrink(struct kmem_cache *s)
2889 * may have freed the last object and be 2999 * may have freed the last object and be
2890 * waiting to release the slab. 3000 * waiting to release the slab.
2891 */ 3001 */
2892 list_del(&page->lru); 3002 __remove_partial(n, page);
2893 n->nr_partial--;
2894 slab_unlock(page); 3003 slab_unlock(page);
2895 discard_slab(s, page); 3004 discard_slab(s, page);
2896 } else { 3005 } else {
@@ -2914,7 +3023,7 @@ int kmem_cache_shrink(struct kmem_cache *s)
2914} 3023}
2915EXPORT_SYMBOL(kmem_cache_shrink); 3024EXPORT_SYMBOL(kmem_cache_shrink);
2916 3025
2917#if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG) 3026#if defined(CONFIG_MEMORY_HOTPLUG)
2918static int slab_mem_going_offline_callback(void *arg) 3027static int slab_mem_going_offline_callback(void *arg)
2919{ 3028{
2920 struct kmem_cache *s; 3029 struct kmem_cache *s;
@@ -2956,7 +3065,7 @@ static void slab_mem_offline_callback(void *arg)
2956 BUG_ON(slabs_node(s, offline_node)); 3065 BUG_ON(slabs_node(s, offline_node));
2957 3066
2958 s->node[offline_node] = NULL; 3067 s->node[offline_node] = NULL;
2959 kmem_cache_free(kmalloc_caches, n); 3068 kmem_cache_free(kmem_cache_node, n);
2960 } 3069 }
2961 } 3070 }
2962 up_read(&slub_lock); 3071 up_read(&slub_lock);
@@ -2989,7 +3098,7 @@ static int slab_mem_going_online_callback(void *arg)
2989 * since memory is not yet available from the node that 3098 * since memory is not yet available from the node that
2990 * is brought up. 3099 * is brought up.
2991 */ 3100 */
2992 n = kmem_cache_alloc(kmalloc_caches, GFP_KERNEL); 3101 n = kmem_cache_alloc(kmem_cache_node, GFP_KERNEL);
2993 if (!n) { 3102 if (!n) {
2994 ret = -ENOMEM; 3103 ret = -ENOMEM;
2995 goto out; 3104 goto out;
@@ -3035,46 +3144,92 @@ static int slab_memory_callback(struct notifier_block *self,
3035 * Basic setup of slabs 3144 * Basic setup of slabs
3036 *******************************************************************/ 3145 *******************************************************************/
3037 3146
3147/*
3148 * Used for early kmem_cache structures that were allocated using
3149 * the page allocator
3150 */
3151
3152static void __init kmem_cache_bootstrap_fixup(struct kmem_cache *s)
3153{
3154 int node;
3155
3156 list_add(&s->list, &slab_caches);
3157 s->refcount = -1;
3158
3159 for_each_node_state(node, N_NORMAL_MEMORY) {
3160 struct kmem_cache_node *n = get_node(s, node);
3161 struct page *p;
3162
3163 if (n) {
3164 list_for_each_entry(p, &n->partial, lru)
3165 p->slab = s;
3166
3167#ifdef CONFIG_SLUB_DEBUG
3168 list_for_each_entry(p, &n->full, lru)
3169 p->slab = s;
3170#endif
3171 }
3172 }
3173}
3174
3038void __init kmem_cache_init(void) 3175void __init kmem_cache_init(void)
3039{ 3176{
3040 int i; 3177 int i;
3041 int caches = 0; 3178 int caches = 0;
3179 struct kmem_cache *temp_kmem_cache;
3180 int order;
3181 struct kmem_cache *temp_kmem_cache_node;
3182 unsigned long kmalloc_size;
3183
3184 kmem_size = offsetof(struct kmem_cache, node) +
3185 nr_node_ids * sizeof(struct kmem_cache_node *);
3186
3187 /* Allocate two kmem_caches from the page allocator */
3188 kmalloc_size = ALIGN(kmem_size, cache_line_size());
3189 order = get_order(2 * kmalloc_size);
3190 kmem_cache = (void *)__get_free_pages(GFP_NOWAIT, order);
3042 3191
3043#ifdef CONFIG_NUMA
3044 /* 3192 /*
3045 * Must first have the slab cache available for the allocations of the 3193 * Must first have the slab cache available for the allocations of the
3046 * struct kmem_cache_node's. There is special bootstrap code in 3194 * struct kmem_cache_node's. There is special bootstrap code in
3047 * kmem_cache_open for slab_state == DOWN. 3195 * kmem_cache_open for slab_state == DOWN.
3048 */ 3196 */
3049 create_kmalloc_cache(&kmalloc_caches[0], "kmem_cache_node", 3197 kmem_cache_node = (void *)kmem_cache + kmalloc_size;
3050 sizeof(struct kmem_cache_node), GFP_NOWAIT); 3198
3051 kmalloc_caches[0].refcount = -1; 3199 kmem_cache_open(kmem_cache_node, "kmem_cache_node",
3052 caches++; 3200 sizeof(struct kmem_cache_node),
3201 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
3053 3202
3054 hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI); 3203 hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
3055#endif
3056 3204
3057 /* Able to allocate the per node structures */ 3205 /* Able to allocate the per node structures */
3058 slab_state = PARTIAL; 3206 slab_state = PARTIAL;
3059 3207
3060 /* Caches that are not of the two-to-the-power-of size */ 3208 temp_kmem_cache = kmem_cache;
3061 if (KMALLOC_MIN_SIZE <= 32) { 3209 kmem_cache_open(kmem_cache, "kmem_cache", kmem_size,
3062 create_kmalloc_cache(&kmalloc_caches[1], 3210 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
3063 "kmalloc-96", 96, GFP_NOWAIT); 3211 kmem_cache = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
3064 caches++; 3212 memcpy(kmem_cache, temp_kmem_cache, kmem_size);
3065 }
3066 if (KMALLOC_MIN_SIZE <= 64) {
3067 create_kmalloc_cache(&kmalloc_caches[2],
3068 "kmalloc-192", 192, GFP_NOWAIT);
3069 caches++;
3070 }
3071 3213
3072 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) { 3214 /*
3073 create_kmalloc_cache(&kmalloc_caches[i], 3215 * Allocate kmem_cache_node properly from the kmem_cache slab.
3074 "kmalloc", 1 << i, GFP_NOWAIT); 3216 * kmem_cache_node is separately allocated so no need to
3075 caches++; 3217 * update any list pointers.
3076 } 3218 */
3219 temp_kmem_cache_node = kmem_cache_node;
3220
3221 kmem_cache_node = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
3222 memcpy(kmem_cache_node, temp_kmem_cache_node, kmem_size);
3223
3224 kmem_cache_bootstrap_fixup(kmem_cache_node);
3225
3226 caches++;
3227 kmem_cache_bootstrap_fixup(kmem_cache);
3228 caches++;
3229 /* Free temporary boot structure */
3230 free_pages((unsigned long)temp_kmem_cache, order);
3077 3231
3232 /* Now we can use the kmem_cache to allocate kmalloc slabs */
3078 3233
3079 /* 3234 /*
3080 * Patch up the size_index table if we have strange large alignment 3235 * Patch up the size_index table if we have strange large alignment
@@ -3114,26 +3269,60 @@ void __init kmem_cache_init(void)
3114 size_index[size_index_elem(i)] = 8; 3269 size_index[size_index_elem(i)] = 8;
3115 } 3270 }
3116 3271
3272 /* Caches that are not of the two-to-the-power-of size */
3273 if (KMALLOC_MIN_SIZE <= 32) {
3274 kmalloc_caches[1] = create_kmalloc_cache("kmalloc-96", 96, 0);
3275 caches++;
3276 }
3277
3278 if (KMALLOC_MIN_SIZE <= 64) {
3279 kmalloc_caches[2] = create_kmalloc_cache("kmalloc-192", 192, 0);
3280 caches++;
3281 }
3282
3283 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
3284 kmalloc_caches[i] = create_kmalloc_cache("kmalloc", 1 << i, 0);
3285 caches++;
3286 }
3287
3117 slab_state = UP; 3288 slab_state = UP;
3118 3289
3119 /* Provide the correct kmalloc names now that the caches are up */ 3290 /* Provide the correct kmalloc names now that the caches are up */
3291 if (KMALLOC_MIN_SIZE <= 32) {
3292 kmalloc_caches[1]->name = kstrdup(kmalloc_caches[1]->name, GFP_NOWAIT);
3293 BUG_ON(!kmalloc_caches[1]->name);
3294 }
3295
3296 if (KMALLOC_MIN_SIZE <= 64) {
3297 kmalloc_caches[2]->name = kstrdup(kmalloc_caches[2]->name, GFP_NOWAIT);
3298 BUG_ON(!kmalloc_caches[2]->name);
3299 }
3300
3120 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) { 3301 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
3121 char *s = kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i); 3302 char *s = kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i);
3122 3303
3123 BUG_ON(!s); 3304 BUG_ON(!s);
3124 kmalloc_caches[i].name = s; 3305 kmalloc_caches[i]->name = s;
3125 } 3306 }
3126 3307
3127#ifdef CONFIG_SMP 3308#ifdef CONFIG_SMP
3128 register_cpu_notifier(&slab_notifier); 3309 register_cpu_notifier(&slab_notifier);
3129#endif 3310#endif
3130#ifdef CONFIG_NUMA
3131 kmem_size = offsetof(struct kmem_cache, node) +
3132 nr_node_ids * sizeof(struct kmem_cache_node *);
3133#else
3134 kmem_size = sizeof(struct kmem_cache);
3135#endif
3136 3311
3312#ifdef CONFIG_ZONE_DMA
3313 for (i = 0; i < SLUB_PAGE_SHIFT; i++) {
3314 struct kmem_cache *s = kmalloc_caches[i];
3315
3316 if (s && s->size) {
3317 char *name = kasprintf(GFP_NOWAIT,
3318 "dma-kmalloc-%d", s->objsize);
3319
3320 BUG_ON(!name);
3321 kmalloc_dma_caches[i] = create_kmalloc_cache(name,
3322 s->objsize, SLAB_CACHE_DMA);
3323 }
3324 }
3325#endif
3137 printk(KERN_INFO 3326 printk(KERN_INFO
3138 "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," 3327 "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d,"
3139 " CPUs=%d, Nodes=%d\n", 3328 " CPUs=%d, Nodes=%d\n",
@@ -3211,6 +3400,7 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
3211 size_t align, unsigned long flags, void (*ctor)(void *)) 3400 size_t align, unsigned long flags, void (*ctor)(void *))
3212{ 3401{
3213 struct kmem_cache *s; 3402 struct kmem_cache *s;
3403 char *n;
3214 3404
3215 if (WARN_ON(!name)) 3405 if (WARN_ON(!name))
3216 return NULL; 3406 return NULL;
@@ -3234,24 +3424,30 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
3234 return s; 3424 return s;
3235 } 3425 }
3236 3426
3427 n = kstrdup(name, GFP_KERNEL);
3428 if (!n)
3429 goto err;
3430
3237 s = kmalloc(kmem_size, GFP_KERNEL); 3431 s = kmalloc(kmem_size, GFP_KERNEL);
3238 if (s) { 3432 if (s) {
3239 if (kmem_cache_open(s, GFP_KERNEL, name, 3433 if (kmem_cache_open(s, n,
3240 size, align, flags, ctor)) { 3434 size, align, flags, ctor)) {
3241 list_add(&s->list, &slab_caches); 3435 list_add(&s->list, &slab_caches);
3242 if (sysfs_slab_add(s)) { 3436 if (sysfs_slab_add(s)) {
3243 list_del(&s->list); 3437 list_del(&s->list);
3438 kfree(n);
3244 kfree(s); 3439 kfree(s);
3245 goto err; 3440 goto err;
3246 } 3441 }
3247 up_write(&slub_lock); 3442 up_write(&slub_lock);
3248 return s; 3443 return s;
3249 } 3444 }
3445 kfree(n);
3250 kfree(s); 3446 kfree(s);
3251 } 3447 }
3448err:
3252 up_write(&slub_lock); 3449 up_write(&slub_lock);
3253 3450
3254err:
3255 if (flags & SLAB_PANIC) 3451 if (flags & SLAB_PANIC)
3256 panic("Cannot create slabcache %s\n", name); 3452 panic("Cannot create slabcache %s\n", name);
3257 else 3453 else
@@ -3312,12 +3508,13 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
3312 3508
3313 ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, caller); 3509 ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, caller);
3314 3510
3315 /* Honor the call site pointer we recieved. */ 3511 /* Honor the call site pointer we received. */
3316 trace_kmalloc(caller, ret, size, s->size, gfpflags); 3512 trace_kmalloc(caller, ret, size, s->size, gfpflags);
3317 3513
3318 return ret; 3514 return ret;
3319} 3515}
3320 3516
3517#ifdef CONFIG_NUMA
3321void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, 3518void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
3322 int node, unsigned long caller) 3519 int node, unsigned long caller)
3323{ 3520{
@@ -3341,13 +3538,14 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
3341 3538
3342 ret = slab_alloc(s, gfpflags, node, caller); 3539 ret = slab_alloc(s, gfpflags, node, caller);
3343 3540
3344 /* Honor the call site pointer we recieved. */ 3541 /* Honor the call site pointer we received. */
3345 trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node); 3542 trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
3346 3543
3347 return ret; 3544 return ret;
3348} 3545}
3546#endif
3349 3547
3350#ifdef CONFIG_SLUB_DEBUG 3548#ifdef CONFIG_SYSFS
3351static int count_inuse(struct page *page) 3549static int count_inuse(struct page *page)
3352{ 3550{
3353 return page->inuse; 3551 return page->inuse;
@@ -3357,7 +3555,9 @@ static int count_total(struct page *page)
3357{ 3555{
3358 return page->objects; 3556 return page->objects;
3359} 3557}
3558#endif
3360 3559
3560#ifdef CONFIG_SLUB_DEBUG
3361static int validate_slab(struct kmem_cache *s, struct page *page, 3561static int validate_slab(struct kmem_cache *s, struct page *page,
3362 unsigned long *map) 3562 unsigned long *map)
3363{ 3563{
@@ -3371,15 +3571,16 @@ static int validate_slab(struct kmem_cache *s, struct page *page,
3371 /* Now we know that a valid freelist exists */ 3571 /* Now we know that a valid freelist exists */
3372 bitmap_zero(map, page->objects); 3572 bitmap_zero(map, page->objects);
3373 3573
3374 for_each_free_object(p, s, page->freelist) { 3574 get_map(s, page, map);
3375 set_bit(slab_index(p, s, addr), map); 3575 for_each_object(p, s, addr, page->objects) {
3376 if (!check_object(s, page, p, 0)) 3576 if (test_bit(slab_index(p, s, addr), map))
3377 return 0; 3577 if (!check_object(s, page, p, SLUB_RED_INACTIVE))
3578 return 0;
3378 } 3579 }
3379 3580
3380 for_each_object(p, s, addr, page->objects) 3581 for_each_object(p, s, addr, page->objects)
3381 if (!test_bit(slab_index(p, s, addr), map)) 3582 if (!test_bit(slab_index(p, s, addr), map))
3382 if (!check_object(s, page, p, 1)) 3583 if (!check_object(s, page, p, SLUB_RED_ACTIVE))
3383 return 0; 3584 return 0;
3384 return 1; 3585 return 1;
3385} 3586}
@@ -3448,65 +3649,6 @@ static long validate_slab_cache(struct kmem_cache *s)
3448 kfree(map); 3649 kfree(map);
3449 return count; 3650 return count;
3450} 3651}
3451
3452#ifdef SLUB_RESILIENCY_TEST
3453static void resiliency_test(void)
3454{
3455 u8 *p;
3456
3457 printk(KERN_ERR "SLUB resiliency testing\n");
3458 printk(KERN_ERR "-----------------------\n");
3459 printk(KERN_ERR "A. Corruption after allocation\n");
3460
3461 p = kzalloc(16, GFP_KERNEL);
3462 p[16] = 0x12;
3463 printk(KERN_ERR "\n1. kmalloc-16: Clobber Redzone/next pointer"
3464 " 0x12->0x%p\n\n", p + 16);
3465
3466 validate_slab_cache(kmalloc_caches + 4);
3467
3468 /* Hmmm... The next two are dangerous */
3469 p = kzalloc(32, GFP_KERNEL);
3470 p[32 + sizeof(void *)] = 0x34;
3471 printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab"
3472 " 0x34 -> -0x%p\n", p);
3473 printk(KERN_ERR
3474 "If allocated object is overwritten then not detectable\n\n");
3475
3476 validate_slab_cache(kmalloc_caches + 5);
3477 p = kzalloc(64, GFP_KERNEL);
3478 p += 64 + (get_cycles() & 0xff) * sizeof(void *);
3479 *p = 0x56;
3480 printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
3481 p);
3482 printk(KERN_ERR
3483 "If allocated object is overwritten then not detectable\n\n");
3484 validate_slab_cache(kmalloc_caches + 6);
3485
3486 printk(KERN_ERR "\nB. Corruption after free\n");
3487 p = kzalloc(128, GFP_KERNEL);
3488 kfree(p);
3489 *p = 0x78;
3490 printk(KERN_ERR "1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
3491 validate_slab_cache(kmalloc_caches + 7);
3492
3493 p = kzalloc(256, GFP_KERNEL);
3494 kfree(p);
3495 p[50] = 0x9a;
3496 printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n",
3497 p);
3498 validate_slab_cache(kmalloc_caches + 8);
3499
3500 p = kzalloc(512, GFP_KERNEL);
3501 kfree(p);
3502 p[512] = 0xab;
3503 printk(KERN_ERR "\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
3504 validate_slab_cache(kmalloc_caches + 9);
3505}
3506#else
3507static void resiliency_test(void) {};
3508#endif
3509
3510/* 3652/*
3511 * Generate lists of code addresses where slabcache objects are allocated 3653 * Generate lists of code addresses where slabcache objects are allocated
3512 * and freed. 3654 * and freed.
@@ -3635,14 +3777,13 @@ static int add_location(struct loc_track *t, struct kmem_cache *s,
3635 3777
3636static void process_slab(struct loc_track *t, struct kmem_cache *s, 3778static void process_slab(struct loc_track *t, struct kmem_cache *s,
3637 struct page *page, enum track_item alloc, 3779 struct page *page, enum track_item alloc,
3638 long *map) 3780 unsigned long *map)
3639{ 3781{
3640 void *addr = page_address(page); 3782 void *addr = page_address(page);
3641 void *p; 3783 void *p;
3642 3784
3643 bitmap_zero(map, page->objects); 3785 bitmap_zero(map, page->objects);
3644 for_each_free_object(p, s, page->freelist) 3786 get_map(s, page, map);
3645 set_bit(slab_index(p, s, addr), map);
3646 3787
3647 for_each_object(p, s, addr, page->objects) 3788 for_each_object(p, s, addr, page->objects)
3648 if (!test_bit(slab_index(p, s, addr), map)) 3789 if (!test_bit(slab_index(p, s, addr), map))
@@ -3691,7 +3832,7 @@ static int list_locations(struct kmem_cache *s, char *buf,
3691 len += sprintf(buf + len, "%7ld ", l->count); 3832 len += sprintf(buf + len, "%7ld ", l->count);
3692 3833
3693 if (l->addr) 3834 if (l->addr)
3694 len += sprint_symbol(buf + len, (unsigned long)l->addr); 3835 len += sprintf(buf + len, "%pS", (void *)l->addr);
3695 else 3836 else
3696 len += sprintf(buf + len, "<not-available>"); 3837 len += sprintf(buf + len, "<not-available>");
3697 3838
@@ -3735,7 +3876,71 @@ static int list_locations(struct kmem_cache *s, char *buf,
3735 len += sprintf(buf, "No data\n"); 3876 len += sprintf(buf, "No data\n");
3736 return len; 3877 return len;
3737} 3878}
3879#endif
3880
3881#ifdef SLUB_RESILIENCY_TEST
3882static void resiliency_test(void)
3883{
3884 u8 *p;
3885
3886 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || SLUB_PAGE_SHIFT < 10);
3887
3888 printk(KERN_ERR "SLUB resiliency testing\n");
3889 printk(KERN_ERR "-----------------------\n");
3890 printk(KERN_ERR "A. Corruption after allocation\n");
3891
3892 p = kzalloc(16, GFP_KERNEL);
3893 p[16] = 0x12;
3894 printk(KERN_ERR "\n1. kmalloc-16: Clobber Redzone/next pointer"
3895 " 0x12->0x%p\n\n", p + 16);
3896
3897 validate_slab_cache(kmalloc_caches[4]);
3898
3899 /* Hmmm... The next two are dangerous */
3900 p = kzalloc(32, GFP_KERNEL);
3901 p[32 + sizeof(void *)] = 0x34;
3902 printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab"
3903 " 0x34 -> -0x%p\n", p);
3904 printk(KERN_ERR
3905 "If allocated object is overwritten then not detectable\n\n");
3906
3907 validate_slab_cache(kmalloc_caches[5]);
3908 p = kzalloc(64, GFP_KERNEL);
3909 p += 64 + (get_cycles() & 0xff) * sizeof(void *);
3910 *p = 0x56;
3911 printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
3912 p);
3913 printk(KERN_ERR
3914 "If allocated object is overwritten then not detectable\n\n");
3915 validate_slab_cache(kmalloc_caches[6]);
3916
3917 printk(KERN_ERR "\nB. Corruption after free\n");
3918 p = kzalloc(128, GFP_KERNEL);
3919 kfree(p);
3920 *p = 0x78;
3921 printk(KERN_ERR "1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
3922 validate_slab_cache(kmalloc_caches[7]);
3923
3924 p = kzalloc(256, GFP_KERNEL);
3925 kfree(p);
3926 p[50] = 0x9a;
3927 printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n",
3928 p);
3929 validate_slab_cache(kmalloc_caches[8]);
3930
3931 p = kzalloc(512, GFP_KERNEL);
3932 kfree(p);
3933 p[512] = 0xab;
3934 printk(KERN_ERR "\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
3935 validate_slab_cache(kmalloc_caches[9]);
3936}
3937#else
3938#ifdef CONFIG_SYSFS
3939static void resiliency_test(void) {};
3940#endif
3941#endif
3738 3942
3943#ifdef CONFIG_SYSFS
3739enum slab_stat_type { 3944enum slab_stat_type {
3740 SL_ALL, /* All slabs */ 3945 SL_ALL, /* All slabs */
3741 SL_PARTIAL, /* Only partially allocated slabs */ 3946 SL_PARTIAL, /* Only partially allocated slabs */
@@ -3788,6 +3993,8 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
3788 } 3993 }
3789 } 3994 }
3790 3995
3996 lock_memory_hotplug();
3997#ifdef CONFIG_SLUB_DEBUG
3791 if (flags & SO_ALL) { 3998 if (flags & SO_ALL) {
3792 for_each_node_state(node, N_NORMAL_MEMORY) { 3999 for_each_node_state(node, N_NORMAL_MEMORY) {
3793 struct kmem_cache_node *n = get_node(s, node); 4000 struct kmem_cache_node *n = get_node(s, node);
@@ -3804,7 +4011,9 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
3804 nodes[node] += x; 4011 nodes[node] += x;
3805 } 4012 }
3806 4013
3807 } else if (flags & SO_PARTIAL) { 4014 } else
4015#endif
4016 if (flags & SO_PARTIAL) {
3808 for_each_node_state(node, N_NORMAL_MEMORY) { 4017 for_each_node_state(node, N_NORMAL_MEMORY) {
3809 struct kmem_cache_node *n = get_node(s, node); 4018 struct kmem_cache_node *n = get_node(s, node);
3810 4019
@@ -3825,10 +4034,12 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
3825 x += sprintf(buf + x, " N%d=%lu", 4034 x += sprintf(buf + x, " N%d=%lu",
3826 node, nodes[node]); 4035 node, nodes[node]);
3827#endif 4036#endif
4037 unlock_memory_hotplug();
3828 kfree(nodes); 4038 kfree(nodes);
3829 return x + sprintf(buf + x, "\n"); 4039 return x + sprintf(buf + x, "\n");
3830} 4040}
3831 4041
4042#ifdef CONFIG_SLUB_DEBUG
3832static int any_slab_objects(struct kmem_cache *s) 4043static int any_slab_objects(struct kmem_cache *s)
3833{ 4044{
3834 int node; 4045 int node;
@@ -3844,6 +4055,7 @@ static int any_slab_objects(struct kmem_cache *s)
3844 } 4055 }
3845 return 0; 4056 return 0;
3846} 4057}
4058#endif
3847 4059
3848#define to_slab_attr(n) container_of(n, struct slab_attribute, attr) 4060#define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
3849#define to_slab(n) container_of(n, struct kmem_cache, kobj); 4061#define to_slab(n) container_of(n, struct kmem_cache, kobj);
@@ -3930,12 +4142,9 @@ SLAB_ATTR(min_partial);
3930 4142
3931static ssize_t ctor_show(struct kmem_cache *s, char *buf) 4143static ssize_t ctor_show(struct kmem_cache *s, char *buf)
3932{ 4144{
3933 if (s->ctor) { 4145 if (!s->ctor)
3934 int n = sprint_symbol(buf, (unsigned long)s->ctor); 4146 return 0;
3935 4147 return sprintf(buf, "%pS\n", s->ctor);
3936 return n + sprintf(buf + n, "\n");
3937 }
3938 return 0;
3939} 4148}
3940SLAB_ATTR_RO(ctor); 4149SLAB_ATTR_RO(ctor);
3941 4150
@@ -3945,12 +4154,6 @@ static ssize_t aliases_show(struct kmem_cache *s, char *buf)
3945} 4154}
3946SLAB_ATTR_RO(aliases); 4155SLAB_ATTR_RO(aliases);
3947 4156
3948static ssize_t slabs_show(struct kmem_cache *s, char *buf)
3949{
3950 return show_slab_objects(s, buf, SO_ALL);
3951}
3952SLAB_ATTR_RO(slabs);
3953
3954static ssize_t partial_show(struct kmem_cache *s, char *buf) 4157static ssize_t partial_show(struct kmem_cache *s, char *buf)
3955{ 4158{
3956 return show_slab_objects(s, buf, SO_PARTIAL); 4159 return show_slab_objects(s, buf, SO_PARTIAL);
@@ -3975,93 +4178,89 @@ static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
3975} 4178}
3976SLAB_ATTR_RO(objects_partial); 4179SLAB_ATTR_RO(objects_partial);
3977 4180
3978static ssize_t total_objects_show(struct kmem_cache *s, char *buf) 4181static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
3979{
3980 return show_slab_objects(s, buf, SO_ALL|SO_TOTAL);
3981}
3982SLAB_ATTR_RO(total_objects);
3983
3984static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)
3985{ 4182{
3986 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DEBUG_FREE)); 4183 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
3987} 4184}
3988 4185
3989static ssize_t sanity_checks_store(struct kmem_cache *s, 4186static ssize_t reclaim_account_store(struct kmem_cache *s,
3990 const char *buf, size_t length) 4187 const char *buf, size_t length)
3991{ 4188{
3992 s->flags &= ~SLAB_DEBUG_FREE; 4189 s->flags &= ~SLAB_RECLAIM_ACCOUNT;
3993 if (buf[0] == '1') 4190 if (buf[0] == '1')
3994 s->flags |= SLAB_DEBUG_FREE; 4191 s->flags |= SLAB_RECLAIM_ACCOUNT;
3995 return length; 4192 return length;
3996} 4193}
3997SLAB_ATTR(sanity_checks); 4194SLAB_ATTR(reclaim_account);
3998 4195
3999static ssize_t trace_show(struct kmem_cache *s, char *buf) 4196static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf)
4000{ 4197{
4001 return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE)); 4198 return sprintf(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN));
4002} 4199}
4200SLAB_ATTR_RO(hwcache_align);
4003 4201
4004static ssize_t trace_store(struct kmem_cache *s, const char *buf, 4202#ifdef CONFIG_ZONE_DMA
4005 size_t length) 4203static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)
4006{ 4204{
4007 s->flags &= ~SLAB_TRACE; 4205 return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA));
4008 if (buf[0] == '1')
4009 s->flags |= SLAB_TRACE;
4010 return length;
4011} 4206}
4012SLAB_ATTR(trace); 4207SLAB_ATTR_RO(cache_dma);
4208#endif
4013 4209
4014#ifdef CONFIG_FAILSLAB 4210static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
4015static ssize_t failslab_show(struct kmem_cache *s, char *buf)
4016{ 4211{
4017 return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB)); 4212 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DESTROY_BY_RCU));
4018} 4213}
4214SLAB_ATTR_RO(destroy_by_rcu);
4019 4215
4020static ssize_t failslab_store(struct kmem_cache *s, const char *buf, 4216static ssize_t reserved_show(struct kmem_cache *s, char *buf)
4021 size_t length)
4022{ 4217{
4023 s->flags &= ~SLAB_FAILSLAB; 4218 return sprintf(buf, "%d\n", s->reserved);
4024 if (buf[0] == '1')
4025 s->flags |= SLAB_FAILSLAB;
4026 return length;
4027} 4219}
4028SLAB_ATTR(failslab); 4220SLAB_ATTR_RO(reserved);
4029#endif
4030 4221
4031static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf) 4222#ifdef CONFIG_SLUB_DEBUG
4223static ssize_t slabs_show(struct kmem_cache *s, char *buf)
4032{ 4224{
4033 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT)); 4225 return show_slab_objects(s, buf, SO_ALL);
4034} 4226}
4227SLAB_ATTR_RO(slabs);
4035 4228
4036static ssize_t reclaim_account_store(struct kmem_cache *s, 4229static ssize_t total_objects_show(struct kmem_cache *s, char *buf)
4037 const char *buf, size_t length)
4038{ 4230{
4039 s->flags &= ~SLAB_RECLAIM_ACCOUNT; 4231 return show_slab_objects(s, buf, SO_ALL|SO_TOTAL);
4040 if (buf[0] == '1')
4041 s->flags |= SLAB_RECLAIM_ACCOUNT;
4042 return length;
4043} 4232}
4044SLAB_ATTR(reclaim_account); 4233SLAB_ATTR_RO(total_objects);
4045 4234
4046static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf) 4235static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)
4047{ 4236{
4048 return sprintf(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN)); 4237 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DEBUG_FREE));
4049} 4238}
4050SLAB_ATTR_RO(hwcache_align);
4051 4239
4052#ifdef CONFIG_ZONE_DMA 4240static ssize_t sanity_checks_store(struct kmem_cache *s,
4053static ssize_t cache_dma_show(struct kmem_cache *s, char *buf) 4241 const char *buf, size_t length)
4054{ 4242{
4055 return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA)); 4243 s->flags &= ~SLAB_DEBUG_FREE;
4244 if (buf[0] == '1')
4245 s->flags |= SLAB_DEBUG_FREE;
4246 return length;
4056} 4247}
4057SLAB_ATTR_RO(cache_dma); 4248SLAB_ATTR(sanity_checks);
4058#endif
4059 4249
4060static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf) 4250static ssize_t trace_show(struct kmem_cache *s, char *buf)
4061{ 4251{
4062 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DESTROY_BY_RCU)); 4252 return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE));
4063} 4253}
4064SLAB_ATTR_RO(destroy_by_rcu); 4254
4255static ssize_t trace_store(struct kmem_cache *s, const char *buf,
4256 size_t length)
4257{
4258 s->flags &= ~SLAB_TRACE;
4259 if (buf[0] == '1')
4260 s->flags |= SLAB_TRACE;
4261 return length;
4262}
4263SLAB_ATTR(trace);
4065 4264
4066static ssize_t red_zone_show(struct kmem_cache *s, char *buf) 4265static ssize_t red_zone_show(struct kmem_cache *s, char *buf)
4067{ 4266{
@@ -4139,6 +4338,40 @@ static ssize_t validate_store(struct kmem_cache *s,
4139} 4338}
4140SLAB_ATTR(validate); 4339SLAB_ATTR(validate);
4141 4340
4341static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
4342{
4343 if (!(s->flags & SLAB_STORE_USER))
4344 return -ENOSYS;
4345 return list_locations(s, buf, TRACK_ALLOC);
4346}
4347SLAB_ATTR_RO(alloc_calls);
4348
4349static ssize_t free_calls_show(struct kmem_cache *s, char *buf)
4350{
4351 if (!(s->flags & SLAB_STORE_USER))
4352 return -ENOSYS;
4353 return list_locations(s, buf, TRACK_FREE);
4354}
4355SLAB_ATTR_RO(free_calls);
4356#endif /* CONFIG_SLUB_DEBUG */
4357
4358#ifdef CONFIG_FAILSLAB
4359static ssize_t failslab_show(struct kmem_cache *s, char *buf)
4360{
4361 return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
4362}
4363
4364static ssize_t failslab_store(struct kmem_cache *s, const char *buf,
4365 size_t length)
4366{
4367 s->flags &= ~SLAB_FAILSLAB;
4368 if (buf[0] == '1')
4369 s->flags |= SLAB_FAILSLAB;
4370 return length;
4371}
4372SLAB_ATTR(failslab);
4373#endif
4374
4142static ssize_t shrink_show(struct kmem_cache *s, char *buf) 4375static ssize_t shrink_show(struct kmem_cache *s, char *buf)
4143{ 4376{
4144 return 0; 4377 return 0;
@@ -4158,22 +4391,6 @@ static ssize_t shrink_store(struct kmem_cache *s,
4158} 4391}
4159SLAB_ATTR(shrink); 4392SLAB_ATTR(shrink);
4160 4393
4161static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
4162{
4163 if (!(s->flags & SLAB_STORE_USER))
4164 return -ENOSYS;
4165 return list_locations(s, buf, TRACK_ALLOC);
4166}
4167SLAB_ATTR_RO(alloc_calls);
4168
4169static ssize_t free_calls_show(struct kmem_cache *s, char *buf)
4170{
4171 if (!(s->flags & SLAB_STORE_USER))
4172 return -ENOSYS;
4173 return list_locations(s, buf, TRACK_FREE);
4174}
4175SLAB_ATTR_RO(free_calls);
4176
4177#ifdef CONFIG_NUMA 4394#ifdef CONFIG_NUMA
4178static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf) 4395static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf)
4179{ 4396{
@@ -4279,25 +4496,28 @@ static struct attribute *slab_attrs[] = {
4279 &min_partial_attr.attr, 4496 &min_partial_attr.attr,
4280 &objects_attr.attr, 4497 &objects_attr.attr,
4281 &objects_partial_attr.attr, 4498 &objects_partial_attr.attr,
4282 &total_objects_attr.attr,
4283 &slabs_attr.attr,
4284 &partial_attr.attr, 4499 &partial_attr.attr,
4285 &cpu_slabs_attr.attr, 4500 &cpu_slabs_attr.attr,
4286 &ctor_attr.attr, 4501 &ctor_attr.attr,
4287 &aliases_attr.attr, 4502 &aliases_attr.attr,
4288 &align_attr.attr, 4503 &align_attr.attr,
4289 &sanity_checks_attr.attr,
4290 &trace_attr.attr,
4291 &hwcache_align_attr.attr, 4504 &hwcache_align_attr.attr,
4292 &reclaim_account_attr.attr, 4505 &reclaim_account_attr.attr,
4293 &destroy_by_rcu_attr.attr, 4506 &destroy_by_rcu_attr.attr,
4507 &shrink_attr.attr,
4508 &reserved_attr.attr,
4509#ifdef CONFIG_SLUB_DEBUG
4510 &total_objects_attr.attr,
4511 &slabs_attr.attr,
4512 &sanity_checks_attr.attr,
4513 &trace_attr.attr,
4294 &red_zone_attr.attr, 4514 &red_zone_attr.attr,
4295 &poison_attr.attr, 4515 &poison_attr.attr,
4296 &store_user_attr.attr, 4516 &store_user_attr.attr,
4297 &validate_attr.attr, 4517 &validate_attr.attr,
4298 &shrink_attr.attr,
4299 &alloc_calls_attr.attr, 4518 &alloc_calls_attr.attr,
4300 &free_calls_attr.attr, 4519 &free_calls_attr.attr,
4520#endif
4301#ifdef CONFIG_ZONE_DMA 4521#ifdef CONFIG_ZONE_DMA
4302 &cache_dma_attr.attr, 4522 &cache_dma_attr.attr,
4303#endif 4523#endif
@@ -4377,6 +4597,7 @@ static void kmem_cache_release(struct kobject *kobj)
4377{ 4597{
4378 struct kmem_cache *s = to_slab(kobj); 4598 struct kmem_cache *s = to_slab(kobj);
4379 4599
4600 kfree(s->name);
4380 kfree(s); 4601 kfree(s);
4381} 4602}
4382 4603
@@ -4579,7 +4800,7 @@ static int __init slab_sysfs_init(void)
4579} 4800}
4580 4801
4581__initcall(slab_sysfs_init); 4802__initcall(slab_sysfs_init);
4582#endif 4803#endif /* CONFIG_SYSFS */
4583 4804
4584/* 4805/*
4585 * The /proc/slabinfo ABI 4806 * The /proc/slabinfo ABI