diff options
Diffstat (limited to 'mm/slub.c')
-rw-r--r-- | mm/slub.c | 1283 |
1 files changed, 752 insertions, 531 deletions
@@ -28,6 +28,8 @@ | |||
28 | #include <linux/math64.h> | 28 | #include <linux/math64.h> |
29 | #include <linux/fault-inject.h> | 29 | #include <linux/fault-inject.h> |
30 | 30 | ||
31 | #include <trace/events/kmem.h> | ||
32 | |||
31 | /* | 33 | /* |
32 | * Lock order: | 34 | * Lock order: |
33 | * 1. slab_lock(page) | 35 | * 1. slab_lock(page) |
@@ -62,7 +64,7 @@ | |||
62 | * we must stay away from it for a while since we may cause a bouncing | 64 | * we must stay away from it for a while since we may cause a bouncing |
63 | * cacheline if we try to acquire the lock. So go onto the next slab. | 65 | * cacheline if we try to acquire the lock. So go onto the next slab. |
64 | * If all pages are busy then we may allocate a new slab instead of reusing | 66 | * If all pages are busy then we may allocate a new slab instead of reusing |
65 | * a partial slab. A new slab has noone operating on it and thus there is | 67 | * a partial slab. A new slab has no one operating on it and thus there is |
66 | * no danger of cacheline contention. | 68 | * no danger of cacheline contention. |
67 | * | 69 | * |
68 | * Interrupts are disabled during allocation and deallocation in order to | 70 | * Interrupts are disabled during allocation and deallocation in order to |
@@ -168,7 +170,6 @@ static inline int kmem_cache_debug(struct kmem_cache *s) | |||
168 | 170 | ||
169 | /* Internal SLUB flags */ | 171 | /* Internal SLUB flags */ |
170 | #define __OBJECT_POISON 0x80000000UL /* Poison object */ | 172 | #define __OBJECT_POISON 0x80000000UL /* Poison object */ |
171 | #define __SYSFS_ADD_DEFERRED 0x40000000UL /* Not yet visible via sysfs */ | ||
172 | 173 | ||
173 | static int kmem_size = sizeof(struct kmem_cache); | 174 | static int kmem_size = sizeof(struct kmem_cache); |
174 | 175 | ||
@@ -178,7 +179,7 @@ static struct notifier_block slab_notifier; | |||
178 | 179 | ||
179 | static enum { | 180 | static enum { |
180 | DOWN, /* No slab functionality available */ | 181 | DOWN, /* No slab functionality available */ |
181 | PARTIAL, /* kmem_cache_open() works but kmalloc does not */ | 182 | PARTIAL, /* Kmem_cache_node works */ |
182 | UP, /* Everything works but does not show up in sysfs */ | 183 | UP, /* Everything works but does not show up in sysfs */ |
183 | SYSFS /* Sysfs up */ | 184 | SYSFS /* Sysfs up */ |
184 | } slab_state = DOWN; | 185 | } slab_state = DOWN; |
@@ -199,7 +200,7 @@ struct track { | |||
199 | 200 | ||
200 | enum track_item { TRACK_ALLOC, TRACK_FREE }; | 201 | enum track_item { TRACK_ALLOC, TRACK_FREE }; |
201 | 202 | ||
202 | #ifdef CONFIG_SLUB_DEBUG | 203 | #ifdef CONFIG_SYSFS |
203 | static int sysfs_slab_add(struct kmem_cache *); | 204 | static int sysfs_slab_add(struct kmem_cache *); |
204 | static int sysfs_slab_alias(struct kmem_cache *, const char *); | 205 | static int sysfs_slab_alias(struct kmem_cache *, const char *); |
205 | static void sysfs_slab_remove(struct kmem_cache *); | 206 | static void sysfs_slab_remove(struct kmem_cache *); |
@@ -210,12 +211,13 @@ static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p) | |||
210 | { return 0; } | 211 | { return 0; } |
211 | static inline void sysfs_slab_remove(struct kmem_cache *s) | 212 | static inline void sysfs_slab_remove(struct kmem_cache *s) |
212 | { | 213 | { |
214 | kfree(s->name); | ||
213 | kfree(s); | 215 | kfree(s); |
214 | } | 216 | } |
215 | 217 | ||
216 | #endif | 218 | #endif |
217 | 219 | ||
218 | static inline void stat(struct kmem_cache *s, enum stat_item si) | 220 | static inline void stat(const struct kmem_cache *s, enum stat_item si) |
219 | { | 221 | { |
220 | #ifdef CONFIG_SLUB_STATS | 222 | #ifdef CONFIG_SLUB_STATS |
221 | __this_cpu_inc(s->cpu_slab->stat[si]); | 223 | __this_cpu_inc(s->cpu_slab->stat[si]); |
@@ -233,11 +235,7 @@ int slab_is_available(void) | |||
233 | 235 | ||
234 | static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) | 236 | static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) |
235 | { | 237 | { |
236 | #ifdef CONFIG_NUMA | ||
237 | return s->node[node]; | 238 | return s->node[node]; |
238 | #else | ||
239 | return &s->local_node; | ||
240 | #endif | ||
241 | } | 239 | } |
242 | 240 | ||
243 | /* Verify that a pointer has an address that is valid within a slab page */ | 241 | /* Verify that a pointer has an address that is valid within a slab page */ |
@@ -263,6 +261,18 @@ static inline void *get_freepointer(struct kmem_cache *s, void *object) | |||
263 | return *(void **)(object + s->offset); | 261 | return *(void **)(object + s->offset); |
264 | } | 262 | } |
265 | 263 | ||
264 | static inline void *get_freepointer_safe(struct kmem_cache *s, void *object) | ||
265 | { | ||
266 | void *p; | ||
267 | |||
268 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
269 | probe_kernel_read(&p, (void **)(object + s->offset), sizeof(p)); | ||
270 | #else | ||
271 | p = get_freepointer(s, object); | ||
272 | #endif | ||
273 | return p; | ||
274 | } | ||
275 | |||
266 | static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) | 276 | static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) |
267 | { | 277 | { |
268 | *(void **)(object + s->offset) = fp; | 278 | *(void **)(object + s->offset) = fp; |
@@ -273,21 +283,46 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) | |||
273 | for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\ | 283 | for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\ |
274 | __p += (__s)->size) | 284 | __p += (__s)->size) |
275 | 285 | ||
276 | /* Scan freelist */ | ||
277 | #define for_each_free_object(__p, __s, __free) \ | ||
278 | for (__p = (__free); __p; __p = get_freepointer((__s), __p)) | ||
279 | |||
280 | /* Determine object index from a given position */ | 286 | /* Determine object index from a given position */ |
281 | static inline int slab_index(void *p, struct kmem_cache *s, void *addr) | 287 | static inline int slab_index(void *p, struct kmem_cache *s, void *addr) |
282 | { | 288 | { |
283 | return (p - addr) / s->size; | 289 | return (p - addr) / s->size; |
284 | } | 290 | } |
285 | 291 | ||
292 | static inline size_t slab_ksize(const struct kmem_cache *s) | ||
293 | { | ||
294 | #ifdef CONFIG_SLUB_DEBUG | ||
295 | /* | ||
296 | * Debugging requires use of the padding between object | ||
297 | * and whatever may come after it. | ||
298 | */ | ||
299 | if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) | ||
300 | return s->objsize; | ||
301 | |||
302 | #endif | ||
303 | /* | ||
304 | * If we have the need to store the freelist pointer | ||
305 | * back there or track user information then we can | ||
306 | * only use the space before that information. | ||
307 | */ | ||
308 | if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER)) | ||
309 | return s->inuse; | ||
310 | /* | ||
311 | * Else we can use all the padding etc for the allocation | ||
312 | */ | ||
313 | return s->size; | ||
314 | } | ||
315 | |||
316 | static inline int order_objects(int order, unsigned long size, int reserved) | ||
317 | { | ||
318 | return ((PAGE_SIZE << order) - reserved) / size; | ||
319 | } | ||
320 | |||
286 | static inline struct kmem_cache_order_objects oo_make(int order, | 321 | static inline struct kmem_cache_order_objects oo_make(int order, |
287 | unsigned long size) | 322 | unsigned long size, int reserved) |
288 | { | 323 | { |
289 | struct kmem_cache_order_objects x = { | 324 | struct kmem_cache_order_objects x = { |
290 | (order << OO_SHIFT) + (PAGE_SIZE << order) / size | 325 | (order << OO_SHIFT) + order_objects(order, size, reserved) |
291 | }; | 326 | }; |
292 | 327 | ||
293 | return x; | 328 | return x; |
@@ -305,6 +340,21 @@ static inline int oo_objects(struct kmem_cache_order_objects x) | |||
305 | 340 | ||
306 | #ifdef CONFIG_SLUB_DEBUG | 341 | #ifdef CONFIG_SLUB_DEBUG |
307 | /* | 342 | /* |
343 | * Determine a map of object in use on a page. | ||
344 | * | ||
345 | * Slab lock or node listlock must be held to guarantee that the page does | ||
346 | * not vanish from under us. | ||
347 | */ | ||
348 | static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map) | ||
349 | { | ||
350 | void *p; | ||
351 | void *addr = page_address(page); | ||
352 | |||
353 | for (p = page->freelist; p; p = get_freepointer(s, p)) | ||
354 | set_bit(slab_index(p, s, addr), map); | ||
355 | } | ||
356 | |||
357 | /* | ||
308 | * Debug settings: | 358 | * Debug settings: |
309 | */ | 359 | */ |
310 | #ifdef CONFIG_SLUB_DEBUG_ON | 360 | #ifdef CONFIG_SLUB_DEBUG_ON |
@@ -494,7 +544,7 @@ static void slab_err(struct kmem_cache *s, struct page *page, char *fmt, ...) | |||
494 | dump_stack(); | 544 | dump_stack(); |
495 | } | 545 | } |
496 | 546 | ||
497 | static void init_object(struct kmem_cache *s, void *object, int active) | 547 | static void init_object(struct kmem_cache *s, void *object, u8 val) |
498 | { | 548 | { |
499 | u8 *p = object; | 549 | u8 *p = object; |
500 | 550 | ||
@@ -504,9 +554,7 @@ static void init_object(struct kmem_cache *s, void *object, int active) | |||
504 | } | 554 | } |
505 | 555 | ||
506 | if (s->flags & SLAB_RED_ZONE) | 556 | if (s->flags & SLAB_RED_ZONE) |
507 | memset(p + s->objsize, | 557 | memset(p + s->objsize, val, s->inuse - s->objsize); |
508 | active ? SLUB_RED_ACTIVE : SLUB_RED_INACTIVE, | ||
509 | s->inuse - s->objsize); | ||
510 | } | 558 | } |
511 | 559 | ||
512 | static u8 *check_bytes(u8 *start, unsigned int value, unsigned int bytes) | 560 | static u8 *check_bytes(u8 *start, unsigned int value, unsigned int bytes) |
@@ -621,7 +669,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page) | |||
621 | return 1; | 669 | return 1; |
622 | 670 | ||
623 | start = page_address(page); | 671 | start = page_address(page); |
624 | length = (PAGE_SIZE << compound_order(page)); | 672 | length = (PAGE_SIZE << compound_order(page)) - s->reserved; |
625 | end = start + length; | 673 | end = start + length; |
626 | remainder = length % s->size; | 674 | remainder = length % s->size; |
627 | if (!remainder) | 675 | if (!remainder) |
@@ -641,17 +689,14 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page) | |||
641 | } | 689 | } |
642 | 690 | ||
643 | static int check_object(struct kmem_cache *s, struct page *page, | 691 | static int check_object(struct kmem_cache *s, struct page *page, |
644 | void *object, int active) | 692 | void *object, u8 val) |
645 | { | 693 | { |
646 | u8 *p = object; | 694 | u8 *p = object; |
647 | u8 *endobject = object + s->objsize; | 695 | u8 *endobject = object + s->objsize; |
648 | 696 | ||
649 | if (s->flags & SLAB_RED_ZONE) { | 697 | if (s->flags & SLAB_RED_ZONE) { |
650 | unsigned int red = | ||
651 | active ? SLUB_RED_ACTIVE : SLUB_RED_INACTIVE; | ||
652 | |||
653 | if (!check_bytes_and_report(s, page, object, "Redzone", | 698 | if (!check_bytes_and_report(s, page, object, "Redzone", |
654 | endobject, red, s->inuse - s->objsize)) | 699 | endobject, val, s->inuse - s->objsize)) |
655 | return 0; | 700 | return 0; |
656 | } else { | 701 | } else { |
657 | if ((s->flags & SLAB_POISON) && s->objsize < s->inuse) { | 702 | if ((s->flags & SLAB_POISON) && s->objsize < s->inuse) { |
@@ -661,7 +706,7 @@ static int check_object(struct kmem_cache *s, struct page *page, | |||
661 | } | 706 | } |
662 | 707 | ||
663 | if (s->flags & SLAB_POISON) { | 708 | if (s->flags & SLAB_POISON) { |
664 | if (!active && (s->flags & __OBJECT_POISON) && | 709 | if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) && |
665 | (!check_bytes_and_report(s, page, p, "Poison", p, | 710 | (!check_bytes_and_report(s, page, p, "Poison", p, |
666 | POISON_FREE, s->objsize - 1) || | 711 | POISON_FREE, s->objsize - 1) || |
667 | !check_bytes_and_report(s, page, p, "Poison", | 712 | !check_bytes_and_report(s, page, p, "Poison", |
@@ -673,7 +718,7 @@ static int check_object(struct kmem_cache *s, struct page *page, | |||
673 | check_pad_bytes(s, page, p); | 718 | check_pad_bytes(s, page, p); |
674 | } | 719 | } |
675 | 720 | ||
676 | if (!s->offset && active) | 721 | if (!s->offset && val == SLUB_RED_ACTIVE) |
677 | /* | 722 | /* |
678 | * Object and freepointer overlap. Cannot check | 723 | * Object and freepointer overlap. Cannot check |
679 | * freepointer while object is allocated. | 724 | * freepointer while object is allocated. |
@@ -705,7 +750,7 @@ static int check_slab(struct kmem_cache *s, struct page *page) | |||
705 | return 0; | 750 | return 0; |
706 | } | 751 | } |
707 | 752 | ||
708 | maxobj = (PAGE_SIZE << compound_order(page)) / s->size; | 753 | maxobj = order_objects(compound_order(page), s->size, s->reserved); |
709 | if (page->objects > maxobj) { | 754 | if (page->objects > maxobj) { |
710 | slab_err(s, page, "objects %u > max %u", | 755 | slab_err(s, page, "objects %u > max %u", |
711 | s->name, page->objects, maxobj); | 756 | s->name, page->objects, maxobj); |
@@ -755,7 +800,7 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search) | |||
755 | nr++; | 800 | nr++; |
756 | } | 801 | } |
757 | 802 | ||
758 | max_objects = (PAGE_SIZE << compound_order(page)) / s->size; | 803 | max_objects = order_objects(compound_order(page), s->size, s->reserved); |
759 | if (max_objects > MAX_OBJS_PER_PAGE) | 804 | if (max_objects > MAX_OBJS_PER_PAGE) |
760 | max_objects = MAX_OBJS_PER_PAGE; | 805 | max_objects = MAX_OBJS_PER_PAGE; |
761 | 806 | ||
@@ -792,6 +837,49 @@ static void trace(struct kmem_cache *s, struct page *page, void *object, | |||
792 | } | 837 | } |
793 | 838 | ||
794 | /* | 839 | /* |
840 | * Hooks for other subsystems that check memory allocations. In a typical | ||
841 | * production configuration these hooks all should produce no code at all. | ||
842 | */ | ||
843 | static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags) | ||
844 | { | ||
845 | flags &= gfp_allowed_mask; | ||
846 | lockdep_trace_alloc(flags); | ||
847 | might_sleep_if(flags & __GFP_WAIT); | ||
848 | |||
849 | return should_failslab(s->objsize, flags, s->flags); | ||
850 | } | ||
851 | |||
852 | static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object) | ||
853 | { | ||
854 | flags &= gfp_allowed_mask; | ||
855 | kmemcheck_slab_alloc(s, flags, object, slab_ksize(s)); | ||
856 | kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, flags); | ||
857 | } | ||
858 | |||
859 | static inline void slab_free_hook(struct kmem_cache *s, void *x) | ||
860 | { | ||
861 | kmemleak_free_recursive(x, s->flags); | ||
862 | |||
863 | /* | ||
864 | * Trouble is that we may no longer disable interupts in the fast path | ||
865 | * So in order to make the debug calls that expect irqs to be | ||
866 | * disabled we need to disable interrupts temporarily. | ||
867 | */ | ||
868 | #if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP) | ||
869 | { | ||
870 | unsigned long flags; | ||
871 | |||
872 | local_irq_save(flags); | ||
873 | kmemcheck_slab_free(s, x, s->objsize); | ||
874 | debug_check_no_locks_freed(x, s->objsize); | ||
875 | local_irq_restore(flags); | ||
876 | } | ||
877 | #endif | ||
878 | if (!(s->flags & SLAB_DEBUG_OBJECTS)) | ||
879 | debug_check_no_obj_freed(x, s->objsize); | ||
880 | } | ||
881 | |||
882 | /* | ||
795 | * Tracking of fully allocated slabs for debugging purposes. | 883 | * Tracking of fully allocated slabs for debugging purposes. |
796 | */ | 884 | */ |
797 | static void add_full(struct kmem_cache_node *n, struct page *page) | 885 | static void add_full(struct kmem_cache_node *n, struct page *page) |
@@ -838,7 +926,7 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects) | |||
838 | * dilemma by deferring the increment of the count during | 926 | * dilemma by deferring the increment of the count during |
839 | * bootstrap (see early_kmem_cache_node_alloc). | 927 | * bootstrap (see early_kmem_cache_node_alloc). |
840 | */ | 928 | */ |
841 | if (!NUMA_BUILD || n) { | 929 | if (n) { |
842 | atomic_long_inc(&n->nr_slabs); | 930 | atomic_long_inc(&n->nr_slabs); |
843 | atomic_long_add(objects, &n->total_objects); | 931 | atomic_long_add(objects, &n->total_objects); |
844 | } | 932 | } |
@@ -858,11 +946,11 @@ static void setup_object_debug(struct kmem_cache *s, struct page *page, | |||
858 | if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON))) | 946 | if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON))) |
859 | return; | 947 | return; |
860 | 948 | ||
861 | init_object(s, object, 0); | 949 | init_object(s, object, SLUB_RED_INACTIVE); |
862 | init_tracking(s, object); | 950 | init_tracking(s, object); |
863 | } | 951 | } |
864 | 952 | ||
865 | static int alloc_debug_processing(struct kmem_cache *s, struct page *page, | 953 | static noinline int alloc_debug_processing(struct kmem_cache *s, struct page *page, |
866 | void *object, unsigned long addr) | 954 | void *object, unsigned long addr) |
867 | { | 955 | { |
868 | if (!check_slab(s, page)) | 956 | if (!check_slab(s, page)) |
@@ -878,14 +966,14 @@ static int alloc_debug_processing(struct kmem_cache *s, struct page *page, | |||
878 | goto bad; | 966 | goto bad; |
879 | } | 967 | } |
880 | 968 | ||
881 | if (!check_object(s, page, object, 0)) | 969 | if (!check_object(s, page, object, SLUB_RED_INACTIVE)) |
882 | goto bad; | 970 | goto bad; |
883 | 971 | ||
884 | /* Success perform special debug activities for allocs */ | 972 | /* Success perform special debug activities for allocs */ |
885 | if (s->flags & SLAB_STORE_USER) | 973 | if (s->flags & SLAB_STORE_USER) |
886 | set_track(s, object, TRACK_ALLOC, addr); | 974 | set_track(s, object, TRACK_ALLOC, addr); |
887 | trace(s, page, object, 1); | 975 | trace(s, page, object, 1); |
888 | init_object(s, object, 1); | 976 | init_object(s, object, SLUB_RED_ACTIVE); |
889 | return 1; | 977 | return 1; |
890 | 978 | ||
891 | bad: | 979 | bad: |
@@ -902,8 +990,8 @@ bad: | |||
902 | return 0; | 990 | return 0; |
903 | } | 991 | } |
904 | 992 | ||
905 | static int free_debug_processing(struct kmem_cache *s, struct page *page, | 993 | static noinline int free_debug_processing(struct kmem_cache *s, |
906 | void *object, unsigned long addr) | 994 | struct page *page, void *object, unsigned long addr) |
907 | { | 995 | { |
908 | if (!check_slab(s, page)) | 996 | if (!check_slab(s, page)) |
909 | goto fail; | 997 | goto fail; |
@@ -918,7 +1006,7 @@ static int free_debug_processing(struct kmem_cache *s, struct page *page, | |||
918 | goto fail; | 1006 | goto fail; |
919 | } | 1007 | } |
920 | 1008 | ||
921 | if (!check_object(s, page, object, 1)) | 1009 | if (!check_object(s, page, object, SLUB_RED_ACTIVE)) |
922 | return 0; | 1010 | return 0; |
923 | 1011 | ||
924 | if (unlikely(s != page->slab)) { | 1012 | if (unlikely(s != page->slab)) { |
@@ -942,7 +1030,7 @@ static int free_debug_processing(struct kmem_cache *s, struct page *page, | |||
942 | if (s->flags & SLAB_STORE_USER) | 1030 | if (s->flags & SLAB_STORE_USER) |
943 | set_track(s, object, TRACK_FREE, addr); | 1031 | set_track(s, object, TRACK_FREE, addr); |
944 | trace(s, page, object, 0); | 1032 | trace(s, page, object, 0); |
945 | init_object(s, object, 0); | 1033 | init_object(s, object, SLUB_RED_INACTIVE); |
946 | return 1; | 1034 | return 1; |
947 | 1035 | ||
948 | fail: | 1036 | fail: |
@@ -1046,7 +1134,7 @@ static inline int free_debug_processing(struct kmem_cache *s, | |||
1046 | static inline int slab_pad_check(struct kmem_cache *s, struct page *page) | 1134 | static inline int slab_pad_check(struct kmem_cache *s, struct page *page) |
1047 | { return 1; } | 1135 | { return 1; } |
1048 | static inline int check_object(struct kmem_cache *s, struct page *page, | 1136 | static inline int check_object(struct kmem_cache *s, struct page *page, |
1049 | void *object, int active) { return 1; } | 1137 | void *object, u8 val) { return 1; } |
1050 | static inline void add_full(struct kmem_cache_node *n, struct page *page) {} | 1138 | static inline void add_full(struct kmem_cache_node *n, struct page *page) {} |
1051 | static inline unsigned long kmem_cache_flags(unsigned long objsize, | 1139 | static inline unsigned long kmem_cache_flags(unsigned long objsize, |
1052 | unsigned long flags, const char *name, | 1140 | unsigned long flags, const char *name, |
@@ -1066,7 +1154,16 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node, | |||
1066 | int objects) {} | 1154 | int objects) {} |
1067 | static inline void dec_slabs_node(struct kmem_cache *s, int node, | 1155 | static inline void dec_slabs_node(struct kmem_cache *s, int node, |
1068 | int objects) {} | 1156 | int objects) {} |
1069 | #endif | 1157 | |
1158 | static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags) | ||
1159 | { return 0; } | ||
1160 | |||
1161 | static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, | ||
1162 | void *object) {} | ||
1163 | |||
1164 | static inline void slab_free_hook(struct kmem_cache *s, void *x) {} | ||
1165 | |||
1166 | #endif /* CONFIG_SLUB_DEBUG */ | ||
1070 | 1167 | ||
1071 | /* | 1168 | /* |
1072 | * Slab allocation and freeing | 1169 | * Slab allocation and freeing |
@@ -1194,7 +1291,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page) | |||
1194 | slab_pad_check(s, page); | 1291 | slab_pad_check(s, page); |
1195 | for_each_object(p, s, page_address(page), | 1292 | for_each_object(p, s, page_address(page), |
1196 | page->objects) | 1293 | page->objects) |
1197 | check_object(s, page, p, 0); | 1294 | check_object(s, page, p, SLUB_RED_INACTIVE); |
1198 | } | 1295 | } |
1199 | 1296 | ||
1200 | kmemcheck_free_shadow(page, compound_order(page)); | 1297 | kmemcheck_free_shadow(page, compound_order(page)); |
@@ -1211,21 +1308,38 @@ static void __free_slab(struct kmem_cache *s, struct page *page) | |||
1211 | __free_pages(page, order); | 1308 | __free_pages(page, order); |
1212 | } | 1309 | } |
1213 | 1310 | ||
1311 | #define need_reserve_slab_rcu \ | ||
1312 | (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head)) | ||
1313 | |||
1214 | static void rcu_free_slab(struct rcu_head *h) | 1314 | static void rcu_free_slab(struct rcu_head *h) |
1215 | { | 1315 | { |
1216 | struct page *page; | 1316 | struct page *page; |
1217 | 1317 | ||
1218 | page = container_of((struct list_head *)h, struct page, lru); | 1318 | if (need_reserve_slab_rcu) |
1319 | page = virt_to_head_page(h); | ||
1320 | else | ||
1321 | page = container_of((struct list_head *)h, struct page, lru); | ||
1322 | |||
1219 | __free_slab(page->slab, page); | 1323 | __free_slab(page->slab, page); |
1220 | } | 1324 | } |
1221 | 1325 | ||
1222 | static void free_slab(struct kmem_cache *s, struct page *page) | 1326 | static void free_slab(struct kmem_cache *s, struct page *page) |
1223 | { | 1327 | { |
1224 | if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) { | 1328 | if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) { |
1225 | /* | 1329 | struct rcu_head *head; |
1226 | * RCU free overloads the RCU head over the LRU | 1330 | |
1227 | */ | 1331 | if (need_reserve_slab_rcu) { |
1228 | struct rcu_head *head = (void *)&page->lru; | 1332 | int order = compound_order(page); |
1333 | int offset = (PAGE_SIZE << order) - s->reserved; | ||
1334 | |||
1335 | VM_BUG_ON(s->reserved != sizeof(*head)); | ||
1336 | head = page_address(page) + offset; | ||
1337 | } else { | ||
1338 | /* | ||
1339 | * RCU free overloads the RCU head over the LRU | ||
1340 | */ | ||
1341 | head = (void *)&page->lru; | ||
1342 | } | ||
1229 | 1343 | ||
1230 | call_rcu(head, rcu_free_slab); | 1344 | call_rcu(head, rcu_free_slab); |
1231 | } else | 1345 | } else |
@@ -1274,13 +1388,19 @@ static void add_partial(struct kmem_cache_node *n, | |||
1274 | spin_unlock(&n->list_lock); | 1388 | spin_unlock(&n->list_lock); |
1275 | } | 1389 | } |
1276 | 1390 | ||
1391 | static inline void __remove_partial(struct kmem_cache_node *n, | ||
1392 | struct page *page) | ||
1393 | { | ||
1394 | list_del(&page->lru); | ||
1395 | n->nr_partial--; | ||
1396 | } | ||
1397 | |||
1277 | static void remove_partial(struct kmem_cache *s, struct page *page) | 1398 | static void remove_partial(struct kmem_cache *s, struct page *page) |
1278 | { | 1399 | { |
1279 | struct kmem_cache_node *n = get_node(s, page_to_nid(page)); | 1400 | struct kmem_cache_node *n = get_node(s, page_to_nid(page)); |
1280 | 1401 | ||
1281 | spin_lock(&n->list_lock); | 1402 | spin_lock(&n->list_lock); |
1282 | list_del(&page->lru); | 1403 | __remove_partial(n, page); |
1283 | n->nr_partial--; | ||
1284 | spin_unlock(&n->list_lock); | 1404 | spin_unlock(&n->list_lock); |
1285 | } | 1405 | } |
1286 | 1406 | ||
@@ -1293,8 +1413,7 @@ static inline int lock_and_freeze_slab(struct kmem_cache_node *n, | |||
1293 | struct page *page) | 1413 | struct page *page) |
1294 | { | 1414 | { |
1295 | if (slab_trylock(page)) { | 1415 | if (slab_trylock(page)) { |
1296 | list_del(&page->lru); | 1416 | __remove_partial(n, page); |
1297 | n->nr_partial--; | ||
1298 | __SetPageSlubFrozen(page); | 1417 | __SetPageSlubFrozen(page); |
1299 | return 1; | 1418 | return 1; |
1300 | } | 1419 | } |
@@ -1391,7 +1510,7 @@ static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node) | |||
1391 | int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node; | 1510 | int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node; |
1392 | 1511 | ||
1393 | page = get_partial_node(get_node(s, searchnode)); | 1512 | page = get_partial_node(get_node(s, searchnode)); |
1394 | if (page || node != -1) | 1513 | if (page || node != NUMA_NO_NODE) |
1395 | return page; | 1514 | return page; |
1396 | 1515 | ||
1397 | return get_any_partial(s, flags); | 1516 | return get_any_partial(s, flags); |
@@ -1405,6 +1524,7 @@ static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node) | |||
1405 | * On exit the slab lock will have been dropped. | 1524 | * On exit the slab lock will have been dropped. |
1406 | */ | 1525 | */ |
1407 | static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) | 1526 | static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) |
1527 | __releases(bitlock) | ||
1408 | { | 1528 | { |
1409 | struct kmem_cache_node *n = get_node(s, page_to_nid(page)); | 1529 | struct kmem_cache_node *n = get_node(s, page_to_nid(page)); |
1410 | 1530 | ||
@@ -1443,10 +1563,77 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) | |||
1443 | } | 1563 | } |
1444 | } | 1564 | } |
1445 | 1565 | ||
1566 | #ifdef CONFIG_PREEMPT | ||
1567 | /* | ||
1568 | * Calculate the next globally unique transaction for disambiguiation | ||
1569 | * during cmpxchg. The transactions start with the cpu number and are then | ||
1570 | * incremented by CONFIG_NR_CPUS. | ||
1571 | */ | ||
1572 | #define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS) | ||
1573 | #else | ||
1574 | /* | ||
1575 | * No preemption supported therefore also no need to check for | ||
1576 | * different cpus. | ||
1577 | */ | ||
1578 | #define TID_STEP 1 | ||
1579 | #endif | ||
1580 | |||
1581 | static inline unsigned long next_tid(unsigned long tid) | ||
1582 | { | ||
1583 | return tid + TID_STEP; | ||
1584 | } | ||
1585 | |||
1586 | static inline unsigned int tid_to_cpu(unsigned long tid) | ||
1587 | { | ||
1588 | return tid % TID_STEP; | ||
1589 | } | ||
1590 | |||
1591 | static inline unsigned long tid_to_event(unsigned long tid) | ||
1592 | { | ||
1593 | return tid / TID_STEP; | ||
1594 | } | ||
1595 | |||
1596 | static inline unsigned int init_tid(int cpu) | ||
1597 | { | ||
1598 | return cpu; | ||
1599 | } | ||
1600 | |||
1601 | static inline void note_cmpxchg_failure(const char *n, | ||
1602 | const struct kmem_cache *s, unsigned long tid) | ||
1603 | { | ||
1604 | #ifdef SLUB_DEBUG_CMPXCHG | ||
1605 | unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid); | ||
1606 | |||
1607 | printk(KERN_INFO "%s %s: cmpxchg redo ", n, s->name); | ||
1608 | |||
1609 | #ifdef CONFIG_PREEMPT | ||
1610 | if (tid_to_cpu(tid) != tid_to_cpu(actual_tid)) | ||
1611 | printk("due to cpu change %d -> %d\n", | ||
1612 | tid_to_cpu(tid), tid_to_cpu(actual_tid)); | ||
1613 | else | ||
1614 | #endif | ||
1615 | if (tid_to_event(tid) != tid_to_event(actual_tid)) | ||
1616 | printk("due to cpu running other code. Event %ld->%ld\n", | ||
1617 | tid_to_event(tid), tid_to_event(actual_tid)); | ||
1618 | else | ||
1619 | printk("for unknown reason: actual=%lx was=%lx target=%lx\n", | ||
1620 | actual_tid, tid, next_tid(tid)); | ||
1621 | #endif | ||
1622 | stat(s, CMPXCHG_DOUBLE_CPU_FAIL); | ||
1623 | } | ||
1624 | |||
1625 | void init_kmem_cache_cpus(struct kmem_cache *s) | ||
1626 | { | ||
1627 | int cpu; | ||
1628 | |||
1629 | for_each_possible_cpu(cpu) | ||
1630 | per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu); | ||
1631 | } | ||
1446 | /* | 1632 | /* |
1447 | * Remove the cpu slab | 1633 | * Remove the cpu slab |
1448 | */ | 1634 | */ |
1449 | static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) | 1635 | static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) |
1636 | __releases(bitlock) | ||
1450 | { | 1637 | { |
1451 | struct page *page = c->page; | 1638 | struct page *page = c->page; |
1452 | int tail = 1; | 1639 | int tail = 1; |
@@ -1473,6 +1660,7 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) | |||
1473 | page->inuse--; | 1660 | page->inuse--; |
1474 | } | 1661 | } |
1475 | c->page = NULL; | 1662 | c->page = NULL; |
1663 | c->tid = next_tid(c->tid); | ||
1476 | unfreeze_slab(s, page, tail); | 1664 | unfreeze_slab(s, page, tail); |
1477 | } | 1665 | } |
1478 | 1666 | ||
@@ -1606,33 +1794,46 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, | |||
1606 | unsigned long addr, struct kmem_cache_cpu *c) | 1794 | unsigned long addr, struct kmem_cache_cpu *c) |
1607 | { | 1795 | { |
1608 | void **object; | 1796 | void **object; |
1609 | struct page *new; | 1797 | struct page *page; |
1798 | unsigned long flags; | ||
1799 | |||
1800 | local_irq_save(flags); | ||
1801 | #ifdef CONFIG_PREEMPT | ||
1802 | /* | ||
1803 | * We may have been preempted and rescheduled on a different | ||
1804 | * cpu before disabling interrupts. Need to reload cpu area | ||
1805 | * pointer. | ||
1806 | */ | ||
1807 | c = this_cpu_ptr(s->cpu_slab); | ||
1808 | #endif | ||
1610 | 1809 | ||
1611 | /* We handle __GFP_ZERO in the caller */ | 1810 | /* We handle __GFP_ZERO in the caller */ |
1612 | gfpflags &= ~__GFP_ZERO; | 1811 | gfpflags &= ~__GFP_ZERO; |
1613 | 1812 | ||
1614 | if (!c->page) | 1813 | page = c->page; |
1814 | if (!page) | ||
1615 | goto new_slab; | 1815 | goto new_slab; |
1616 | 1816 | ||
1617 | slab_lock(c->page); | 1817 | slab_lock(page); |
1618 | if (unlikely(!node_match(c, node))) | 1818 | if (unlikely(!node_match(c, node))) |
1619 | goto another_slab; | 1819 | goto another_slab; |
1620 | 1820 | ||
1621 | stat(s, ALLOC_REFILL); | 1821 | stat(s, ALLOC_REFILL); |
1622 | 1822 | ||
1623 | load_freelist: | 1823 | load_freelist: |
1624 | object = c->page->freelist; | 1824 | object = page->freelist; |
1625 | if (unlikely(!object)) | 1825 | if (unlikely(!object)) |
1626 | goto another_slab; | 1826 | goto another_slab; |
1627 | if (kmem_cache_debug(s)) | 1827 | if (kmem_cache_debug(s)) |
1628 | goto debug; | 1828 | goto debug; |
1629 | 1829 | ||
1630 | c->freelist = get_freepointer(s, object); | 1830 | c->freelist = get_freepointer(s, object); |
1631 | c->page->inuse = c->page->objects; | 1831 | page->inuse = page->objects; |
1632 | c->page->freelist = NULL; | 1832 | page->freelist = NULL; |
1633 | c->node = page_to_nid(c->page); | 1833 | |
1634 | unlock_out: | 1834 | slab_unlock(page); |
1635 | slab_unlock(c->page); | 1835 | c->tid = next_tid(c->tid); |
1836 | local_irq_restore(flags); | ||
1636 | stat(s, ALLOC_SLOWPATH); | 1837 | stat(s, ALLOC_SLOWPATH); |
1637 | return object; | 1838 | return object; |
1638 | 1839 | ||
@@ -1640,42 +1841,50 @@ another_slab: | |||
1640 | deactivate_slab(s, c); | 1841 | deactivate_slab(s, c); |
1641 | 1842 | ||
1642 | new_slab: | 1843 | new_slab: |
1643 | new = get_partial(s, gfpflags, node); | 1844 | page = get_partial(s, gfpflags, node); |
1644 | if (new) { | 1845 | if (page) { |
1645 | c->page = new; | ||
1646 | stat(s, ALLOC_FROM_PARTIAL); | 1846 | stat(s, ALLOC_FROM_PARTIAL); |
1847 | c->node = page_to_nid(page); | ||
1848 | c->page = page; | ||
1647 | goto load_freelist; | 1849 | goto load_freelist; |
1648 | } | 1850 | } |
1649 | 1851 | ||
1852 | gfpflags &= gfp_allowed_mask; | ||
1650 | if (gfpflags & __GFP_WAIT) | 1853 | if (gfpflags & __GFP_WAIT) |
1651 | local_irq_enable(); | 1854 | local_irq_enable(); |
1652 | 1855 | ||
1653 | new = new_slab(s, gfpflags, node); | 1856 | page = new_slab(s, gfpflags, node); |
1654 | 1857 | ||
1655 | if (gfpflags & __GFP_WAIT) | 1858 | if (gfpflags & __GFP_WAIT) |
1656 | local_irq_disable(); | 1859 | local_irq_disable(); |
1657 | 1860 | ||
1658 | if (new) { | 1861 | if (page) { |
1659 | c = __this_cpu_ptr(s->cpu_slab); | 1862 | c = __this_cpu_ptr(s->cpu_slab); |
1660 | stat(s, ALLOC_SLAB); | 1863 | stat(s, ALLOC_SLAB); |
1661 | if (c->page) | 1864 | if (c->page) |
1662 | flush_slab(s, c); | 1865 | flush_slab(s, c); |
1663 | slab_lock(new); | 1866 | |
1664 | __SetPageSlubFrozen(new); | 1867 | slab_lock(page); |
1665 | c->page = new; | 1868 | __SetPageSlubFrozen(page); |
1869 | c->node = page_to_nid(page); | ||
1870 | c->page = page; | ||
1666 | goto load_freelist; | 1871 | goto load_freelist; |
1667 | } | 1872 | } |
1668 | if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit()) | 1873 | if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit()) |
1669 | slab_out_of_memory(s, gfpflags, node); | 1874 | slab_out_of_memory(s, gfpflags, node); |
1875 | local_irq_restore(flags); | ||
1670 | return NULL; | 1876 | return NULL; |
1671 | debug: | 1877 | debug: |
1672 | if (!alloc_debug_processing(s, c->page, object, addr)) | 1878 | if (!alloc_debug_processing(s, page, object, addr)) |
1673 | goto another_slab; | 1879 | goto another_slab; |
1674 | 1880 | ||
1675 | c->page->inuse++; | 1881 | page->inuse++; |
1676 | c->page->freelist = get_freepointer(s, object); | 1882 | page->freelist = get_freepointer(s, object); |
1677 | c->node = -1; | 1883 | deactivate_slab(s, c); |
1678 | goto unlock_out; | 1884 | c->page = NULL; |
1885 | c->node = NUMA_NO_NODE; | ||
1886 | local_irq_restore(flags); | ||
1887 | return object; | ||
1679 | } | 1888 | } |
1680 | 1889 | ||
1681 | /* | 1890 | /* |
@@ -1693,34 +1902,63 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, | |||
1693 | { | 1902 | { |
1694 | void **object; | 1903 | void **object; |
1695 | struct kmem_cache_cpu *c; | 1904 | struct kmem_cache_cpu *c; |
1696 | unsigned long flags; | 1905 | unsigned long tid; |
1697 | |||
1698 | gfpflags &= gfp_allowed_mask; | ||
1699 | 1906 | ||
1700 | lockdep_trace_alloc(gfpflags); | 1907 | if (slab_pre_alloc_hook(s, gfpflags)) |
1701 | might_sleep_if(gfpflags & __GFP_WAIT); | ||
1702 | |||
1703 | if (should_failslab(s->objsize, gfpflags, s->flags)) | ||
1704 | return NULL; | 1908 | return NULL; |
1705 | 1909 | ||
1706 | local_irq_save(flags); | 1910 | redo: |
1911 | |||
1912 | /* | ||
1913 | * Must read kmem_cache cpu data via this cpu ptr. Preemption is | ||
1914 | * enabled. We may switch back and forth between cpus while | ||
1915 | * reading from one cpu area. That does not matter as long | ||
1916 | * as we end up on the original cpu again when doing the cmpxchg. | ||
1917 | */ | ||
1707 | c = __this_cpu_ptr(s->cpu_slab); | 1918 | c = __this_cpu_ptr(s->cpu_slab); |
1919 | |||
1920 | /* | ||
1921 | * The transaction ids are globally unique per cpu and per operation on | ||
1922 | * a per cpu queue. Thus they can be guarantee that the cmpxchg_double | ||
1923 | * occurs on the right processor and that there was no operation on the | ||
1924 | * linked list in between. | ||
1925 | */ | ||
1926 | tid = c->tid; | ||
1927 | barrier(); | ||
1928 | |||
1708 | object = c->freelist; | 1929 | object = c->freelist; |
1709 | if (unlikely(!object || !node_match(c, node))) | 1930 | if (unlikely(!object || !node_match(c, node))) |
1710 | 1931 | ||
1711 | object = __slab_alloc(s, gfpflags, node, addr, c); | 1932 | object = __slab_alloc(s, gfpflags, node, addr, c); |
1712 | 1933 | ||
1713 | else { | 1934 | else { |
1714 | c->freelist = get_freepointer(s, object); | 1935 | /* |
1936 | * The cmpxchg will only match if there was no additional | ||
1937 | * operation and if we are on the right processor. | ||
1938 | * | ||
1939 | * The cmpxchg does the following atomically (without lock semantics!) | ||
1940 | * 1. Relocate first pointer to the current per cpu area. | ||
1941 | * 2. Verify that tid and freelist have not been changed | ||
1942 | * 3. If they were not changed replace tid and freelist | ||
1943 | * | ||
1944 | * Since this is without lock semantics the protection is only against | ||
1945 | * code executing on this cpu *not* from access by other cpus. | ||
1946 | */ | ||
1947 | if (unlikely(!irqsafe_cpu_cmpxchg_double( | ||
1948 | s->cpu_slab->freelist, s->cpu_slab->tid, | ||
1949 | object, tid, | ||
1950 | get_freepointer_safe(s, object), next_tid(tid)))) { | ||
1951 | |||
1952 | note_cmpxchg_failure("slab_alloc", s, tid); | ||
1953 | goto redo; | ||
1954 | } | ||
1715 | stat(s, ALLOC_FASTPATH); | 1955 | stat(s, ALLOC_FASTPATH); |
1716 | } | 1956 | } |
1717 | local_irq_restore(flags); | ||
1718 | 1957 | ||
1719 | if (unlikely(gfpflags & __GFP_ZERO) && object) | 1958 | if (unlikely(gfpflags & __GFP_ZERO) && object) |
1720 | memset(object, 0, s->objsize); | 1959 | memset(object, 0, s->objsize); |
1721 | 1960 | ||
1722 | kmemcheck_slab_alloc(s, gfpflags, object, s->objsize); | 1961 | slab_post_alloc_hook(s, gfpflags, object); |
1723 | kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, gfpflags); | ||
1724 | 1962 | ||
1725 | return object; | 1963 | return object; |
1726 | } | 1964 | } |
@@ -1736,11 +1974,21 @@ void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) | |||
1736 | EXPORT_SYMBOL(kmem_cache_alloc); | 1974 | EXPORT_SYMBOL(kmem_cache_alloc); |
1737 | 1975 | ||
1738 | #ifdef CONFIG_TRACING | 1976 | #ifdef CONFIG_TRACING |
1739 | void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags) | 1977 | void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size) |
1740 | { | 1978 | { |
1741 | return slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_); | 1979 | void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_); |
1980 | trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags); | ||
1981 | return ret; | ||
1982 | } | ||
1983 | EXPORT_SYMBOL(kmem_cache_alloc_trace); | ||
1984 | |||
1985 | void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) | ||
1986 | { | ||
1987 | void *ret = kmalloc_order(size, flags, order); | ||
1988 | trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags); | ||
1989 | return ret; | ||
1742 | } | 1990 | } |
1743 | EXPORT_SYMBOL(kmem_cache_alloc_notrace); | 1991 | EXPORT_SYMBOL(kmalloc_order_trace); |
1744 | #endif | 1992 | #endif |
1745 | 1993 | ||
1746 | #ifdef CONFIG_NUMA | 1994 | #ifdef CONFIG_NUMA |
@@ -1754,16 +2002,20 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) | |||
1754 | return ret; | 2002 | return ret; |
1755 | } | 2003 | } |
1756 | EXPORT_SYMBOL(kmem_cache_alloc_node); | 2004 | EXPORT_SYMBOL(kmem_cache_alloc_node); |
1757 | #endif | ||
1758 | 2005 | ||
1759 | #ifdef CONFIG_TRACING | 2006 | #ifdef CONFIG_TRACING |
1760 | void *kmem_cache_alloc_node_notrace(struct kmem_cache *s, | 2007 | void *kmem_cache_alloc_node_trace(struct kmem_cache *s, |
1761 | gfp_t gfpflags, | 2008 | gfp_t gfpflags, |
1762 | int node) | 2009 | int node, size_t size) |
1763 | { | 2010 | { |
1764 | return slab_alloc(s, gfpflags, node, _RET_IP_); | 2011 | void *ret = slab_alloc(s, gfpflags, node, _RET_IP_); |
2012 | |||
2013 | trace_kmalloc_node(_RET_IP_, ret, | ||
2014 | size, s->size, gfpflags, node); | ||
2015 | return ret; | ||
1765 | } | 2016 | } |
1766 | EXPORT_SYMBOL(kmem_cache_alloc_node_notrace); | 2017 | EXPORT_SYMBOL(kmem_cache_alloc_node_trace); |
2018 | #endif | ||
1767 | #endif | 2019 | #endif |
1768 | 2020 | ||
1769 | /* | 2021 | /* |
@@ -1779,14 +2031,15 @@ static void __slab_free(struct kmem_cache *s, struct page *page, | |||
1779 | { | 2031 | { |
1780 | void *prior; | 2032 | void *prior; |
1781 | void **object = (void *)x; | 2033 | void **object = (void *)x; |
2034 | unsigned long flags; | ||
1782 | 2035 | ||
1783 | stat(s, FREE_SLOWPATH); | 2036 | local_irq_save(flags); |
1784 | slab_lock(page); | 2037 | slab_lock(page); |
2038 | stat(s, FREE_SLOWPATH); | ||
1785 | 2039 | ||
1786 | if (kmem_cache_debug(s)) | 2040 | if (kmem_cache_debug(s) && !free_debug_processing(s, page, x, addr)) |
1787 | goto debug; | 2041 | goto out_unlock; |
1788 | 2042 | ||
1789 | checks_ok: | ||
1790 | prior = page->freelist; | 2043 | prior = page->freelist; |
1791 | set_freepointer(s, object, prior); | 2044 | set_freepointer(s, object, prior); |
1792 | page->freelist = object; | 2045 | page->freelist = object; |
@@ -1811,6 +2064,7 @@ checks_ok: | |||
1811 | 2064 | ||
1812 | out_unlock: | 2065 | out_unlock: |
1813 | slab_unlock(page); | 2066 | slab_unlock(page); |
2067 | local_irq_restore(flags); | ||
1814 | return; | 2068 | return; |
1815 | 2069 | ||
1816 | slab_empty: | 2070 | slab_empty: |
@@ -1822,14 +2076,9 @@ slab_empty: | |||
1822 | stat(s, FREE_REMOVE_PARTIAL); | 2076 | stat(s, FREE_REMOVE_PARTIAL); |
1823 | } | 2077 | } |
1824 | slab_unlock(page); | 2078 | slab_unlock(page); |
2079 | local_irq_restore(flags); | ||
1825 | stat(s, FREE_SLAB); | 2080 | stat(s, FREE_SLAB); |
1826 | discard_slab(s, page); | 2081 | discard_slab(s, page); |
1827 | return; | ||
1828 | |||
1829 | debug: | ||
1830 | if (!free_debug_processing(s, page, x, addr)) | ||
1831 | goto out_unlock; | ||
1832 | goto checks_ok; | ||
1833 | } | 2082 | } |
1834 | 2083 | ||
1835 | /* | 2084 | /* |
@@ -1848,23 +2097,38 @@ static __always_inline void slab_free(struct kmem_cache *s, | |||
1848 | { | 2097 | { |
1849 | void **object = (void *)x; | 2098 | void **object = (void *)x; |
1850 | struct kmem_cache_cpu *c; | 2099 | struct kmem_cache_cpu *c; |
1851 | unsigned long flags; | 2100 | unsigned long tid; |
1852 | 2101 | ||
1853 | kmemleak_free_recursive(x, s->flags); | 2102 | slab_free_hook(s, x); |
1854 | local_irq_save(flags); | 2103 | |
2104 | redo: | ||
2105 | |||
2106 | /* | ||
2107 | * Determine the currently cpus per cpu slab. | ||
2108 | * The cpu may change afterward. However that does not matter since | ||
2109 | * data is retrieved via this pointer. If we are on the same cpu | ||
2110 | * during the cmpxchg then the free will succedd. | ||
2111 | */ | ||
1855 | c = __this_cpu_ptr(s->cpu_slab); | 2112 | c = __this_cpu_ptr(s->cpu_slab); |
1856 | kmemcheck_slab_free(s, object, s->objsize); | 2113 | |
1857 | debug_check_no_locks_freed(object, s->objsize); | 2114 | tid = c->tid; |
1858 | if (!(s->flags & SLAB_DEBUG_OBJECTS)) | 2115 | barrier(); |
1859 | debug_check_no_obj_freed(object, s->objsize); | 2116 | |
1860 | if (likely(page == c->page && c->node >= 0)) { | 2117 | if (likely(page == c->page)) { |
1861 | set_freepointer(s, object, c->freelist); | 2118 | set_freepointer(s, object, c->freelist); |
1862 | c->freelist = object; | 2119 | |
2120 | if (unlikely(!irqsafe_cpu_cmpxchg_double( | ||
2121 | s->cpu_slab->freelist, s->cpu_slab->tid, | ||
2122 | c->freelist, tid, | ||
2123 | object, next_tid(tid)))) { | ||
2124 | |||
2125 | note_cmpxchg_failure("slab_free", s, tid); | ||
2126 | goto redo; | ||
2127 | } | ||
1863 | stat(s, FREE_FASTPATH); | 2128 | stat(s, FREE_FASTPATH); |
1864 | } else | 2129 | } else |
1865 | __slab_free(s, page, x, addr); | 2130 | __slab_free(s, page, x, addr); |
1866 | 2131 | ||
1867 | local_irq_restore(flags); | ||
1868 | } | 2132 | } |
1869 | 2133 | ||
1870 | void kmem_cache_free(struct kmem_cache *s, void *x) | 2134 | void kmem_cache_free(struct kmem_cache *s, void *x) |
@@ -1879,17 +2143,6 @@ void kmem_cache_free(struct kmem_cache *s, void *x) | |||
1879 | } | 2143 | } |
1880 | EXPORT_SYMBOL(kmem_cache_free); | 2144 | EXPORT_SYMBOL(kmem_cache_free); |
1881 | 2145 | ||
1882 | /* Figure out on which slab page the object resides */ | ||
1883 | static struct page *get_object_page(const void *x) | ||
1884 | { | ||
1885 | struct page *page = virt_to_head_page(x); | ||
1886 | |||
1887 | if (!PageSlab(page)) | ||
1888 | return NULL; | ||
1889 | |||
1890 | return page; | ||
1891 | } | ||
1892 | |||
1893 | /* | 2146 | /* |
1894 | * Object placement in a slab is made very easy because we always start at | 2147 | * Object placement in a slab is made very easy because we always start at |
1895 | * offset 0. If we tune the size of the object to the alignment then we can | 2148 | * offset 0. If we tune the size of the object to the alignment then we can |
@@ -1945,13 +2198,13 @@ static int slub_nomerge; | |||
1945 | * the smallest order which will fit the object. | 2198 | * the smallest order which will fit the object. |
1946 | */ | 2199 | */ |
1947 | static inline int slab_order(int size, int min_objects, | 2200 | static inline int slab_order(int size, int min_objects, |
1948 | int max_order, int fract_leftover) | 2201 | int max_order, int fract_leftover, int reserved) |
1949 | { | 2202 | { |
1950 | int order; | 2203 | int order; |
1951 | int rem; | 2204 | int rem; |
1952 | int min_order = slub_min_order; | 2205 | int min_order = slub_min_order; |
1953 | 2206 | ||
1954 | if ((PAGE_SIZE << min_order) / size > MAX_OBJS_PER_PAGE) | 2207 | if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE) |
1955 | return get_order(size * MAX_OBJS_PER_PAGE) - 1; | 2208 | return get_order(size * MAX_OBJS_PER_PAGE) - 1; |
1956 | 2209 | ||
1957 | for (order = max(min_order, | 2210 | for (order = max(min_order, |
@@ -1960,10 +2213,10 @@ static inline int slab_order(int size, int min_objects, | |||
1960 | 2213 | ||
1961 | unsigned long slab_size = PAGE_SIZE << order; | 2214 | unsigned long slab_size = PAGE_SIZE << order; |
1962 | 2215 | ||
1963 | if (slab_size < min_objects * size) | 2216 | if (slab_size < min_objects * size + reserved) |
1964 | continue; | 2217 | continue; |
1965 | 2218 | ||
1966 | rem = slab_size % size; | 2219 | rem = (slab_size - reserved) % size; |
1967 | 2220 | ||
1968 | if (rem <= slab_size / fract_leftover) | 2221 | if (rem <= slab_size / fract_leftover) |
1969 | break; | 2222 | break; |
@@ -1973,7 +2226,7 @@ static inline int slab_order(int size, int min_objects, | |||
1973 | return order; | 2226 | return order; |
1974 | } | 2227 | } |
1975 | 2228 | ||
1976 | static inline int calculate_order(int size) | 2229 | static inline int calculate_order(int size, int reserved) |
1977 | { | 2230 | { |
1978 | int order; | 2231 | int order; |
1979 | int min_objects; | 2232 | int min_objects; |
@@ -1991,14 +2244,14 @@ static inline int calculate_order(int size) | |||
1991 | min_objects = slub_min_objects; | 2244 | min_objects = slub_min_objects; |
1992 | if (!min_objects) | 2245 | if (!min_objects) |
1993 | min_objects = 4 * (fls(nr_cpu_ids) + 1); | 2246 | min_objects = 4 * (fls(nr_cpu_ids) + 1); |
1994 | max_objects = (PAGE_SIZE << slub_max_order)/size; | 2247 | max_objects = order_objects(slub_max_order, size, reserved); |
1995 | min_objects = min(min_objects, max_objects); | 2248 | min_objects = min(min_objects, max_objects); |
1996 | 2249 | ||
1997 | while (min_objects > 1) { | 2250 | while (min_objects > 1) { |
1998 | fraction = 16; | 2251 | fraction = 16; |
1999 | while (fraction >= 4) { | 2252 | while (fraction >= 4) { |
2000 | order = slab_order(size, min_objects, | 2253 | order = slab_order(size, min_objects, |
2001 | slub_max_order, fraction); | 2254 | slub_max_order, fraction, reserved); |
2002 | if (order <= slub_max_order) | 2255 | if (order <= slub_max_order) |
2003 | return order; | 2256 | return order; |
2004 | fraction /= 2; | 2257 | fraction /= 2; |
@@ -2010,14 +2263,14 @@ static inline int calculate_order(int size) | |||
2010 | * We were unable to place multiple objects in a slab. Now | 2263 | * We were unable to place multiple objects in a slab. Now |
2011 | * lets see if we can place a single object there. | 2264 | * lets see if we can place a single object there. |
2012 | */ | 2265 | */ |
2013 | order = slab_order(size, 1, slub_max_order, 1); | 2266 | order = slab_order(size, 1, slub_max_order, 1, reserved); |
2014 | if (order <= slub_max_order) | 2267 | if (order <= slub_max_order) |
2015 | return order; | 2268 | return order; |
2016 | 2269 | ||
2017 | /* | 2270 | /* |
2018 | * Doh this slab cannot be placed using slub_max_order. | 2271 | * Doh this slab cannot be placed using slub_max_order. |
2019 | */ | 2272 | */ |
2020 | order = slab_order(size, 1, MAX_ORDER, 1); | 2273 | order = slab_order(size, 1, MAX_ORDER, 1, reserved); |
2021 | if (order < MAX_ORDER) | 2274 | if (order < MAX_ORDER) |
2022 | return order; | 2275 | return order; |
2023 | return -ENOSYS; | 2276 | return -ENOSYS; |
@@ -2062,26 +2315,28 @@ init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s) | |||
2062 | #endif | 2315 | #endif |
2063 | } | 2316 | } |
2064 | 2317 | ||
2065 | static DEFINE_PER_CPU(struct kmem_cache_cpu, kmalloc_percpu[KMALLOC_CACHES]); | 2318 | static inline int alloc_kmem_cache_cpus(struct kmem_cache *s) |
2066 | |||
2067 | static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags) | ||
2068 | { | 2319 | { |
2069 | if (s < kmalloc_caches + KMALLOC_CACHES && s >= kmalloc_caches) | 2320 | BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE < |
2070 | /* | 2321 | SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu)); |
2071 | * Boot time creation of the kmalloc array. Use static per cpu data | 2322 | |
2072 | * since the per cpu allocator is not available yet. | 2323 | /* |
2073 | */ | 2324 | * Must align to double word boundary for the double cmpxchg |
2074 | s->cpu_slab = kmalloc_percpu + (s - kmalloc_caches); | 2325 | * instructions to work; see __pcpu_double_call_return_bool(). |
2075 | else | 2326 | */ |
2076 | s->cpu_slab = alloc_percpu(struct kmem_cache_cpu); | 2327 | s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu), |
2328 | 2 * sizeof(void *)); | ||
2077 | 2329 | ||
2078 | if (!s->cpu_slab) | 2330 | if (!s->cpu_slab) |
2079 | return 0; | 2331 | return 0; |
2080 | 2332 | ||
2333 | init_kmem_cache_cpus(s); | ||
2334 | |||
2081 | return 1; | 2335 | return 1; |
2082 | } | 2336 | } |
2083 | 2337 | ||
2084 | #ifdef CONFIG_NUMA | 2338 | static struct kmem_cache *kmem_cache_node; |
2339 | |||
2085 | /* | 2340 | /* |
2086 | * No kmalloc_node yet so do it by hand. We know that this is the first | 2341 | * No kmalloc_node yet so do it by hand. We know that this is the first |
2087 | * slab on the node for this slabcache. There are no concurrent accesses | 2342 | * slab on the node for this slabcache. There are no concurrent accesses |
@@ -2091,15 +2346,15 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags) | |||
2091 | * when allocating for the kmalloc_node_cache. This is used for bootstrapping | 2346 | * when allocating for the kmalloc_node_cache. This is used for bootstrapping |
2092 | * memory on a fresh node that has no slab structures yet. | 2347 | * memory on a fresh node that has no slab structures yet. |
2093 | */ | 2348 | */ |
2094 | static void early_kmem_cache_node_alloc(gfp_t gfpflags, int node) | 2349 | static void early_kmem_cache_node_alloc(int node) |
2095 | { | 2350 | { |
2096 | struct page *page; | 2351 | struct page *page; |
2097 | struct kmem_cache_node *n; | 2352 | struct kmem_cache_node *n; |
2098 | unsigned long flags; | 2353 | unsigned long flags; |
2099 | 2354 | ||
2100 | BUG_ON(kmalloc_caches->size < sizeof(struct kmem_cache_node)); | 2355 | BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node)); |
2101 | 2356 | ||
2102 | page = new_slab(kmalloc_caches, gfpflags, node); | 2357 | page = new_slab(kmem_cache_node, GFP_NOWAIT, node); |
2103 | 2358 | ||
2104 | BUG_ON(!page); | 2359 | BUG_ON(!page); |
2105 | if (page_to_nid(page) != node) { | 2360 | if (page_to_nid(page) != node) { |
@@ -2111,15 +2366,15 @@ static void early_kmem_cache_node_alloc(gfp_t gfpflags, int node) | |||
2111 | 2366 | ||
2112 | n = page->freelist; | 2367 | n = page->freelist; |
2113 | BUG_ON(!n); | 2368 | BUG_ON(!n); |
2114 | page->freelist = get_freepointer(kmalloc_caches, n); | 2369 | page->freelist = get_freepointer(kmem_cache_node, n); |
2115 | page->inuse++; | 2370 | page->inuse++; |
2116 | kmalloc_caches->node[node] = n; | 2371 | kmem_cache_node->node[node] = n; |
2117 | #ifdef CONFIG_SLUB_DEBUG | 2372 | #ifdef CONFIG_SLUB_DEBUG |
2118 | init_object(kmalloc_caches, n, 1); | 2373 | init_object(kmem_cache_node, n, SLUB_RED_ACTIVE); |
2119 | init_tracking(kmalloc_caches, n); | 2374 | init_tracking(kmem_cache_node, n); |
2120 | #endif | 2375 | #endif |
2121 | init_kmem_cache_node(n, kmalloc_caches); | 2376 | init_kmem_cache_node(n, kmem_cache_node); |
2122 | inc_slabs_node(kmalloc_caches, node, page->objects); | 2377 | inc_slabs_node(kmem_cache_node, node, page->objects); |
2123 | 2378 | ||
2124 | /* | 2379 | /* |
2125 | * lockdep requires consistent irq usage for each lock | 2380 | * lockdep requires consistent irq usage for each lock |
@@ -2137,13 +2392,15 @@ static void free_kmem_cache_nodes(struct kmem_cache *s) | |||
2137 | 2392 | ||
2138 | for_each_node_state(node, N_NORMAL_MEMORY) { | 2393 | for_each_node_state(node, N_NORMAL_MEMORY) { |
2139 | struct kmem_cache_node *n = s->node[node]; | 2394 | struct kmem_cache_node *n = s->node[node]; |
2395 | |||
2140 | if (n) | 2396 | if (n) |
2141 | kmem_cache_free(kmalloc_caches, n); | 2397 | kmem_cache_free(kmem_cache_node, n); |
2398 | |||
2142 | s->node[node] = NULL; | 2399 | s->node[node] = NULL; |
2143 | } | 2400 | } |
2144 | } | 2401 | } |
2145 | 2402 | ||
2146 | static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags) | 2403 | static int init_kmem_cache_nodes(struct kmem_cache *s) |
2147 | { | 2404 | { |
2148 | int node; | 2405 | int node; |
2149 | 2406 | ||
@@ -2151,11 +2408,11 @@ static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags) | |||
2151 | struct kmem_cache_node *n; | 2408 | struct kmem_cache_node *n; |
2152 | 2409 | ||
2153 | if (slab_state == DOWN) { | 2410 | if (slab_state == DOWN) { |
2154 | early_kmem_cache_node_alloc(gfpflags, node); | 2411 | early_kmem_cache_node_alloc(node); |
2155 | continue; | 2412 | continue; |
2156 | } | 2413 | } |
2157 | n = kmem_cache_alloc_node(kmalloc_caches, | 2414 | n = kmem_cache_alloc_node(kmem_cache_node, |
2158 | gfpflags, node); | 2415 | GFP_KERNEL, node); |
2159 | 2416 | ||
2160 | if (!n) { | 2417 | if (!n) { |
2161 | free_kmem_cache_nodes(s); | 2418 | free_kmem_cache_nodes(s); |
@@ -2167,17 +2424,6 @@ static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags) | |||
2167 | } | 2424 | } |
2168 | return 1; | 2425 | return 1; |
2169 | } | 2426 | } |
2170 | #else | ||
2171 | static void free_kmem_cache_nodes(struct kmem_cache *s) | ||
2172 | { | ||
2173 | } | ||
2174 | |||
2175 | static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags) | ||
2176 | { | ||
2177 | init_kmem_cache_node(&s->local_node, s); | ||
2178 | return 1; | ||
2179 | } | ||
2180 | #endif | ||
2181 | 2427 | ||
2182 | static void set_min_partial(struct kmem_cache *s, unsigned long min) | 2428 | static void set_min_partial(struct kmem_cache *s, unsigned long min) |
2183 | { | 2429 | { |
@@ -2285,7 +2531,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) | |||
2285 | if (forced_order >= 0) | 2531 | if (forced_order >= 0) |
2286 | order = forced_order; | 2532 | order = forced_order; |
2287 | else | 2533 | else |
2288 | order = calculate_order(size); | 2534 | order = calculate_order(size, s->reserved); |
2289 | 2535 | ||
2290 | if (order < 0) | 2536 | if (order < 0) |
2291 | return 0; | 2537 | return 0; |
@@ -2303,8 +2549,8 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) | |||
2303 | /* | 2549 | /* |
2304 | * Determine the number of objects per slab | 2550 | * Determine the number of objects per slab |
2305 | */ | 2551 | */ |
2306 | s->oo = oo_make(order, size); | 2552 | s->oo = oo_make(order, size, s->reserved); |
2307 | s->min = oo_make(get_order(size), size); | 2553 | s->min = oo_make(get_order(size), size, s->reserved); |
2308 | if (oo_objects(s->oo) > oo_objects(s->max)) | 2554 | if (oo_objects(s->oo) > oo_objects(s->max)) |
2309 | s->max = s->oo; | 2555 | s->max = s->oo; |
2310 | 2556 | ||
@@ -2312,7 +2558,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) | |||
2312 | 2558 | ||
2313 | } | 2559 | } |
2314 | 2560 | ||
2315 | static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, | 2561 | static int kmem_cache_open(struct kmem_cache *s, |
2316 | const char *name, size_t size, | 2562 | const char *name, size_t size, |
2317 | size_t align, unsigned long flags, | 2563 | size_t align, unsigned long flags, |
2318 | void (*ctor)(void *)) | 2564 | void (*ctor)(void *)) |
@@ -2323,6 +2569,10 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, | |||
2323 | s->objsize = size; | 2569 | s->objsize = size; |
2324 | s->align = align; | 2570 | s->align = align; |
2325 | s->flags = kmem_cache_flags(size, flags, name, ctor); | 2571 | s->flags = kmem_cache_flags(size, flags, name, ctor); |
2572 | s->reserved = 0; | ||
2573 | |||
2574 | if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU)) | ||
2575 | s->reserved = sizeof(struct rcu_head); | ||
2326 | 2576 | ||
2327 | if (!calculate_sizes(s, -1)) | 2577 | if (!calculate_sizes(s, -1)) |
2328 | goto error; | 2578 | goto error; |
@@ -2348,10 +2598,10 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, | |||
2348 | #ifdef CONFIG_NUMA | 2598 | #ifdef CONFIG_NUMA |
2349 | s->remote_node_defrag_ratio = 1000; | 2599 | s->remote_node_defrag_ratio = 1000; |
2350 | #endif | 2600 | #endif |
2351 | if (!init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA)) | 2601 | if (!init_kmem_cache_nodes(s)) |
2352 | goto error; | 2602 | goto error; |
2353 | 2603 | ||
2354 | if (alloc_kmem_cache_cpus(s, gfpflags & ~SLUB_DMA)) | 2604 | if (alloc_kmem_cache_cpus(s)) |
2355 | return 1; | 2605 | return 1; |
2356 | 2606 | ||
2357 | free_kmem_cache_nodes(s); | 2607 | free_kmem_cache_nodes(s); |
@@ -2365,35 +2615,6 @@ error: | |||
2365 | } | 2615 | } |
2366 | 2616 | ||
2367 | /* | 2617 | /* |
2368 | * Check if a given pointer is valid | ||
2369 | */ | ||
2370 | int kmem_ptr_validate(struct kmem_cache *s, const void *object) | ||
2371 | { | ||
2372 | struct page *page; | ||
2373 | |||
2374 | if (!kern_ptr_validate(object, s->size)) | ||
2375 | return 0; | ||
2376 | |||
2377 | page = get_object_page(object); | ||
2378 | |||
2379 | if (!page || s != page->slab) | ||
2380 | /* No slab or wrong slab */ | ||
2381 | return 0; | ||
2382 | |||
2383 | if (!check_valid_pointer(s, page, object)) | ||
2384 | return 0; | ||
2385 | |||
2386 | /* | ||
2387 | * We could also check if the object is on the slabs freelist. | ||
2388 | * But this would be too expensive and it seems that the main | ||
2389 | * purpose of kmem_ptr_valid() is to check if the object belongs | ||
2390 | * to a certain slab. | ||
2391 | */ | ||
2392 | return 1; | ||
2393 | } | ||
2394 | EXPORT_SYMBOL(kmem_ptr_validate); | ||
2395 | |||
2396 | /* | ||
2397 | * Determine the size of a slab object | 2618 | * Determine the size of a slab object |
2398 | */ | 2619 | */ |
2399 | unsigned int kmem_cache_size(struct kmem_cache *s) | 2620 | unsigned int kmem_cache_size(struct kmem_cache *s) |
@@ -2402,28 +2623,20 @@ unsigned int kmem_cache_size(struct kmem_cache *s) | |||
2402 | } | 2623 | } |
2403 | EXPORT_SYMBOL(kmem_cache_size); | 2624 | EXPORT_SYMBOL(kmem_cache_size); |
2404 | 2625 | ||
2405 | const char *kmem_cache_name(struct kmem_cache *s) | ||
2406 | { | ||
2407 | return s->name; | ||
2408 | } | ||
2409 | EXPORT_SYMBOL(kmem_cache_name); | ||
2410 | |||
2411 | static void list_slab_objects(struct kmem_cache *s, struct page *page, | 2626 | static void list_slab_objects(struct kmem_cache *s, struct page *page, |
2412 | const char *text) | 2627 | const char *text) |
2413 | { | 2628 | { |
2414 | #ifdef CONFIG_SLUB_DEBUG | 2629 | #ifdef CONFIG_SLUB_DEBUG |
2415 | void *addr = page_address(page); | 2630 | void *addr = page_address(page); |
2416 | void *p; | 2631 | void *p; |
2417 | long *map = kzalloc(BITS_TO_LONGS(page->objects) * sizeof(long), | 2632 | unsigned long *map = kzalloc(BITS_TO_LONGS(page->objects) * |
2418 | GFP_ATOMIC); | 2633 | sizeof(long), GFP_ATOMIC); |
2419 | |||
2420 | if (!map) | 2634 | if (!map) |
2421 | return; | 2635 | return; |
2422 | slab_err(s, page, "%s", text); | 2636 | slab_err(s, page, "%s", text); |
2423 | slab_lock(page); | 2637 | slab_lock(page); |
2424 | for_each_free_object(p, s, page->freelist) | ||
2425 | set_bit(slab_index(p, s, addr), map); | ||
2426 | 2638 | ||
2639 | get_map(s, page, map); | ||
2427 | for_each_object(p, s, addr, page->objects) { | 2640 | for_each_object(p, s, addr, page->objects) { |
2428 | 2641 | ||
2429 | if (!test_bit(slab_index(p, s, addr), map)) { | 2642 | if (!test_bit(slab_index(p, s, addr), map)) { |
@@ -2448,9 +2661,8 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n) | |||
2448 | spin_lock_irqsave(&n->list_lock, flags); | 2661 | spin_lock_irqsave(&n->list_lock, flags); |
2449 | list_for_each_entry_safe(page, h, &n->partial, lru) { | 2662 | list_for_each_entry_safe(page, h, &n->partial, lru) { |
2450 | if (!page->inuse) { | 2663 | if (!page->inuse) { |
2451 | list_del(&page->lru); | 2664 | __remove_partial(n, page); |
2452 | discard_slab(s, page); | 2665 | discard_slab(s, page); |
2453 | n->nr_partial--; | ||
2454 | } else { | 2666 | } else { |
2455 | list_slab_objects(s, page, | 2667 | list_slab_objects(s, page, |
2456 | "Objects remaining on kmem_cache_close()"); | 2668 | "Objects remaining on kmem_cache_close()"); |
@@ -2507,9 +2719,15 @@ EXPORT_SYMBOL(kmem_cache_destroy); | |||
2507 | * Kmalloc subsystem | 2719 | * Kmalloc subsystem |
2508 | *******************************************************************/ | 2720 | *******************************************************************/ |
2509 | 2721 | ||
2510 | struct kmem_cache kmalloc_caches[KMALLOC_CACHES] __cacheline_aligned; | 2722 | struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT]; |
2511 | EXPORT_SYMBOL(kmalloc_caches); | 2723 | EXPORT_SYMBOL(kmalloc_caches); |
2512 | 2724 | ||
2725 | static struct kmem_cache *kmem_cache; | ||
2726 | |||
2727 | #ifdef CONFIG_ZONE_DMA | ||
2728 | static struct kmem_cache *kmalloc_dma_caches[SLUB_PAGE_SHIFT]; | ||
2729 | #endif | ||
2730 | |||
2513 | static int __init setup_slub_min_order(char *str) | 2731 | static int __init setup_slub_min_order(char *str) |
2514 | { | 2732 | { |
2515 | get_option(&str, &slub_min_order); | 2733 | get_option(&str, &slub_min_order); |
@@ -2546,116 +2764,29 @@ static int __init setup_slub_nomerge(char *str) | |||
2546 | 2764 | ||
2547 | __setup("slub_nomerge", setup_slub_nomerge); | 2765 | __setup("slub_nomerge", setup_slub_nomerge); |
2548 | 2766 | ||
2549 | static struct kmem_cache *create_kmalloc_cache(struct kmem_cache *s, | 2767 | static struct kmem_cache *__init create_kmalloc_cache(const char *name, |
2550 | const char *name, int size, gfp_t gfp_flags) | 2768 | int size, unsigned int flags) |
2551 | { | 2769 | { |
2552 | unsigned int flags = 0; | 2770 | struct kmem_cache *s; |
2553 | 2771 | ||
2554 | if (gfp_flags & SLUB_DMA) | 2772 | s = kmem_cache_alloc(kmem_cache, GFP_NOWAIT); |
2555 | flags = SLAB_CACHE_DMA; | ||
2556 | 2773 | ||
2557 | /* | 2774 | /* |
2558 | * This function is called with IRQs disabled during early-boot on | 2775 | * This function is called with IRQs disabled during early-boot on |
2559 | * single CPU so there's no need to take slub_lock here. | 2776 | * single CPU so there's no need to take slub_lock here. |
2560 | */ | 2777 | */ |
2561 | if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN, | 2778 | if (!kmem_cache_open(s, name, size, ARCH_KMALLOC_MINALIGN, |
2562 | flags, NULL)) | 2779 | flags, NULL)) |
2563 | goto panic; | 2780 | goto panic; |
2564 | 2781 | ||
2565 | list_add(&s->list, &slab_caches); | 2782 | list_add(&s->list, &slab_caches); |
2566 | |||
2567 | if (sysfs_slab_add(s)) | ||
2568 | goto panic; | ||
2569 | return s; | 2783 | return s; |
2570 | 2784 | ||
2571 | panic: | 2785 | panic: |
2572 | panic("Creation of kmalloc slab %s size=%d failed.\n", name, size); | 2786 | panic("Creation of kmalloc slab %s size=%d failed.\n", name, size); |
2787 | return NULL; | ||
2573 | } | 2788 | } |
2574 | 2789 | ||
2575 | #ifdef CONFIG_ZONE_DMA | ||
2576 | static struct kmem_cache *kmalloc_caches_dma[SLUB_PAGE_SHIFT]; | ||
2577 | |||
2578 | static void sysfs_add_func(struct work_struct *w) | ||
2579 | { | ||
2580 | struct kmem_cache *s; | ||
2581 | |||
2582 | down_write(&slub_lock); | ||
2583 | list_for_each_entry(s, &slab_caches, list) { | ||
2584 | if (s->flags & __SYSFS_ADD_DEFERRED) { | ||
2585 | s->flags &= ~__SYSFS_ADD_DEFERRED; | ||
2586 | sysfs_slab_add(s); | ||
2587 | } | ||
2588 | } | ||
2589 | up_write(&slub_lock); | ||
2590 | } | ||
2591 | |||
2592 | static DECLARE_WORK(sysfs_add_work, sysfs_add_func); | ||
2593 | |||
2594 | static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags) | ||
2595 | { | ||
2596 | struct kmem_cache *s; | ||
2597 | char *text; | ||
2598 | size_t realsize; | ||
2599 | unsigned long slabflags; | ||
2600 | int i; | ||
2601 | |||
2602 | s = kmalloc_caches_dma[index]; | ||
2603 | if (s) | ||
2604 | return s; | ||
2605 | |||
2606 | /* Dynamically create dma cache */ | ||
2607 | if (flags & __GFP_WAIT) | ||
2608 | down_write(&slub_lock); | ||
2609 | else { | ||
2610 | if (!down_write_trylock(&slub_lock)) | ||
2611 | goto out; | ||
2612 | } | ||
2613 | |||
2614 | if (kmalloc_caches_dma[index]) | ||
2615 | goto unlock_out; | ||
2616 | |||
2617 | realsize = kmalloc_caches[index].objsize; | ||
2618 | text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d", | ||
2619 | (unsigned int)realsize); | ||
2620 | |||
2621 | s = NULL; | ||
2622 | for (i = 0; i < KMALLOC_CACHES; i++) | ||
2623 | if (!kmalloc_caches[i].size) | ||
2624 | break; | ||
2625 | |||
2626 | BUG_ON(i >= KMALLOC_CACHES); | ||
2627 | s = kmalloc_caches + i; | ||
2628 | |||
2629 | /* | ||
2630 | * Must defer sysfs creation to a workqueue because we don't know | ||
2631 | * what context we are called from. Before sysfs comes up, we don't | ||
2632 | * need to do anything because our sysfs initcall will start by | ||
2633 | * adding all existing slabs to sysfs. | ||
2634 | */ | ||
2635 | slabflags = SLAB_CACHE_DMA|SLAB_NOTRACK; | ||
2636 | if (slab_state >= SYSFS) | ||
2637 | slabflags |= __SYSFS_ADD_DEFERRED; | ||
2638 | |||
2639 | if (!text || !kmem_cache_open(s, flags, text, | ||
2640 | realsize, ARCH_KMALLOC_MINALIGN, slabflags, NULL)) { | ||
2641 | s->size = 0; | ||
2642 | kfree(text); | ||
2643 | goto unlock_out; | ||
2644 | } | ||
2645 | |||
2646 | list_add(&s->list, &slab_caches); | ||
2647 | kmalloc_caches_dma[index] = s; | ||
2648 | |||
2649 | if (slab_state >= SYSFS) | ||
2650 | schedule_work(&sysfs_add_work); | ||
2651 | |||
2652 | unlock_out: | ||
2653 | up_write(&slub_lock); | ||
2654 | out: | ||
2655 | return kmalloc_caches_dma[index]; | ||
2656 | } | ||
2657 | #endif | ||
2658 | |||
2659 | /* | 2790 | /* |
2660 | * Conversion table for small slabs sizes / 8 to the index in the | 2791 | * Conversion table for small slabs sizes / 8 to the index in the |
2661 | * kmalloc array. This is necessary for slabs < 192 since we have non power | 2792 | * kmalloc array. This is necessary for slabs < 192 since we have non power |
@@ -2708,10 +2839,10 @@ static struct kmem_cache *get_slab(size_t size, gfp_t flags) | |||
2708 | 2839 | ||
2709 | #ifdef CONFIG_ZONE_DMA | 2840 | #ifdef CONFIG_ZONE_DMA |
2710 | if (unlikely((flags & SLUB_DMA))) | 2841 | if (unlikely((flags & SLUB_DMA))) |
2711 | return dma_kmalloc_cache(index, flags); | 2842 | return kmalloc_dma_caches[index]; |
2712 | 2843 | ||
2713 | #endif | 2844 | #endif |
2714 | return &kmalloc_caches[index]; | 2845 | return kmalloc_caches[index]; |
2715 | } | 2846 | } |
2716 | 2847 | ||
2717 | void *__kmalloc(size_t size, gfp_t flags) | 2848 | void *__kmalloc(size_t size, gfp_t flags) |
@@ -2735,6 +2866,7 @@ void *__kmalloc(size_t size, gfp_t flags) | |||
2735 | } | 2866 | } |
2736 | EXPORT_SYMBOL(__kmalloc); | 2867 | EXPORT_SYMBOL(__kmalloc); |
2737 | 2868 | ||
2869 | #ifdef CONFIG_NUMA | ||
2738 | static void *kmalloc_large_node(size_t size, gfp_t flags, int node) | 2870 | static void *kmalloc_large_node(size_t size, gfp_t flags, int node) |
2739 | { | 2871 | { |
2740 | struct page *page; | 2872 | struct page *page; |
@@ -2749,7 +2881,6 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node) | |||
2749 | return ptr; | 2881 | return ptr; |
2750 | } | 2882 | } |
2751 | 2883 | ||
2752 | #ifdef CONFIG_NUMA | ||
2753 | void *__kmalloc_node(size_t size, gfp_t flags, int node) | 2884 | void *__kmalloc_node(size_t size, gfp_t flags, int node) |
2754 | { | 2885 | { |
2755 | struct kmem_cache *s; | 2886 | struct kmem_cache *s; |
@@ -2782,7 +2913,6 @@ EXPORT_SYMBOL(__kmalloc_node); | |||
2782 | size_t ksize(const void *object) | 2913 | size_t ksize(const void *object) |
2783 | { | 2914 | { |
2784 | struct page *page; | 2915 | struct page *page; |
2785 | struct kmem_cache *s; | ||
2786 | 2916 | ||
2787 | if (unlikely(object == ZERO_SIZE_PTR)) | 2917 | if (unlikely(object == ZERO_SIZE_PTR)) |
2788 | return 0; | 2918 | return 0; |
@@ -2793,28 +2923,8 @@ size_t ksize(const void *object) | |||
2793 | WARN_ON(!PageCompound(page)); | 2923 | WARN_ON(!PageCompound(page)); |
2794 | return PAGE_SIZE << compound_order(page); | 2924 | return PAGE_SIZE << compound_order(page); |
2795 | } | 2925 | } |
2796 | s = page->slab; | ||
2797 | 2926 | ||
2798 | #ifdef CONFIG_SLUB_DEBUG | 2927 | return slab_ksize(page->slab); |
2799 | /* | ||
2800 | * Debugging requires use of the padding between object | ||
2801 | * and whatever may come after it. | ||
2802 | */ | ||
2803 | if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) | ||
2804 | return s->objsize; | ||
2805 | |||
2806 | #endif | ||
2807 | /* | ||
2808 | * If we have the need to store the freelist pointer | ||
2809 | * back there or track user information then we can | ||
2810 | * only use the space before that information. | ||
2811 | */ | ||
2812 | if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER)) | ||
2813 | return s->inuse; | ||
2814 | /* | ||
2815 | * Else we can use all the padding etc for the allocation | ||
2816 | */ | ||
2817 | return s->size; | ||
2818 | } | 2928 | } |
2819 | EXPORT_SYMBOL(ksize); | 2929 | EXPORT_SYMBOL(ksize); |
2820 | 2930 | ||
@@ -2889,8 +2999,7 @@ int kmem_cache_shrink(struct kmem_cache *s) | |||
2889 | * may have freed the last object and be | 2999 | * may have freed the last object and be |
2890 | * waiting to release the slab. | 3000 | * waiting to release the slab. |
2891 | */ | 3001 | */ |
2892 | list_del(&page->lru); | 3002 | __remove_partial(n, page); |
2893 | n->nr_partial--; | ||
2894 | slab_unlock(page); | 3003 | slab_unlock(page); |
2895 | discard_slab(s, page); | 3004 | discard_slab(s, page); |
2896 | } else { | 3005 | } else { |
@@ -2914,7 +3023,7 @@ int kmem_cache_shrink(struct kmem_cache *s) | |||
2914 | } | 3023 | } |
2915 | EXPORT_SYMBOL(kmem_cache_shrink); | 3024 | EXPORT_SYMBOL(kmem_cache_shrink); |
2916 | 3025 | ||
2917 | #if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG) | 3026 | #if defined(CONFIG_MEMORY_HOTPLUG) |
2918 | static int slab_mem_going_offline_callback(void *arg) | 3027 | static int slab_mem_going_offline_callback(void *arg) |
2919 | { | 3028 | { |
2920 | struct kmem_cache *s; | 3029 | struct kmem_cache *s; |
@@ -2956,7 +3065,7 @@ static void slab_mem_offline_callback(void *arg) | |||
2956 | BUG_ON(slabs_node(s, offline_node)); | 3065 | BUG_ON(slabs_node(s, offline_node)); |
2957 | 3066 | ||
2958 | s->node[offline_node] = NULL; | 3067 | s->node[offline_node] = NULL; |
2959 | kmem_cache_free(kmalloc_caches, n); | 3068 | kmem_cache_free(kmem_cache_node, n); |
2960 | } | 3069 | } |
2961 | } | 3070 | } |
2962 | up_read(&slub_lock); | 3071 | up_read(&slub_lock); |
@@ -2989,7 +3098,7 @@ static int slab_mem_going_online_callback(void *arg) | |||
2989 | * since memory is not yet available from the node that | 3098 | * since memory is not yet available from the node that |
2990 | * is brought up. | 3099 | * is brought up. |
2991 | */ | 3100 | */ |
2992 | n = kmem_cache_alloc(kmalloc_caches, GFP_KERNEL); | 3101 | n = kmem_cache_alloc(kmem_cache_node, GFP_KERNEL); |
2993 | if (!n) { | 3102 | if (!n) { |
2994 | ret = -ENOMEM; | 3103 | ret = -ENOMEM; |
2995 | goto out; | 3104 | goto out; |
@@ -3035,46 +3144,92 @@ static int slab_memory_callback(struct notifier_block *self, | |||
3035 | * Basic setup of slabs | 3144 | * Basic setup of slabs |
3036 | *******************************************************************/ | 3145 | *******************************************************************/ |
3037 | 3146 | ||
3147 | /* | ||
3148 | * Used for early kmem_cache structures that were allocated using | ||
3149 | * the page allocator | ||
3150 | */ | ||
3151 | |||
3152 | static void __init kmem_cache_bootstrap_fixup(struct kmem_cache *s) | ||
3153 | { | ||
3154 | int node; | ||
3155 | |||
3156 | list_add(&s->list, &slab_caches); | ||
3157 | s->refcount = -1; | ||
3158 | |||
3159 | for_each_node_state(node, N_NORMAL_MEMORY) { | ||
3160 | struct kmem_cache_node *n = get_node(s, node); | ||
3161 | struct page *p; | ||
3162 | |||
3163 | if (n) { | ||
3164 | list_for_each_entry(p, &n->partial, lru) | ||
3165 | p->slab = s; | ||
3166 | |||
3167 | #ifdef CONFIG_SLUB_DEBUG | ||
3168 | list_for_each_entry(p, &n->full, lru) | ||
3169 | p->slab = s; | ||
3170 | #endif | ||
3171 | } | ||
3172 | } | ||
3173 | } | ||
3174 | |||
3038 | void __init kmem_cache_init(void) | 3175 | void __init kmem_cache_init(void) |
3039 | { | 3176 | { |
3040 | int i; | 3177 | int i; |
3041 | int caches = 0; | 3178 | int caches = 0; |
3179 | struct kmem_cache *temp_kmem_cache; | ||
3180 | int order; | ||
3181 | struct kmem_cache *temp_kmem_cache_node; | ||
3182 | unsigned long kmalloc_size; | ||
3183 | |||
3184 | kmem_size = offsetof(struct kmem_cache, node) + | ||
3185 | nr_node_ids * sizeof(struct kmem_cache_node *); | ||
3186 | |||
3187 | /* Allocate two kmem_caches from the page allocator */ | ||
3188 | kmalloc_size = ALIGN(kmem_size, cache_line_size()); | ||
3189 | order = get_order(2 * kmalloc_size); | ||
3190 | kmem_cache = (void *)__get_free_pages(GFP_NOWAIT, order); | ||
3042 | 3191 | ||
3043 | #ifdef CONFIG_NUMA | ||
3044 | /* | 3192 | /* |
3045 | * Must first have the slab cache available for the allocations of the | 3193 | * Must first have the slab cache available for the allocations of the |
3046 | * struct kmem_cache_node's. There is special bootstrap code in | 3194 | * struct kmem_cache_node's. There is special bootstrap code in |
3047 | * kmem_cache_open for slab_state == DOWN. | 3195 | * kmem_cache_open for slab_state == DOWN. |
3048 | */ | 3196 | */ |
3049 | create_kmalloc_cache(&kmalloc_caches[0], "kmem_cache_node", | 3197 | kmem_cache_node = (void *)kmem_cache + kmalloc_size; |
3050 | sizeof(struct kmem_cache_node), GFP_NOWAIT); | 3198 | |
3051 | kmalloc_caches[0].refcount = -1; | 3199 | kmem_cache_open(kmem_cache_node, "kmem_cache_node", |
3052 | caches++; | 3200 | sizeof(struct kmem_cache_node), |
3201 | 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); | ||
3053 | 3202 | ||
3054 | hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI); | 3203 | hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI); |
3055 | #endif | ||
3056 | 3204 | ||
3057 | /* Able to allocate the per node structures */ | 3205 | /* Able to allocate the per node structures */ |
3058 | slab_state = PARTIAL; | 3206 | slab_state = PARTIAL; |
3059 | 3207 | ||
3060 | /* Caches that are not of the two-to-the-power-of size */ | 3208 | temp_kmem_cache = kmem_cache; |
3061 | if (KMALLOC_MIN_SIZE <= 32) { | 3209 | kmem_cache_open(kmem_cache, "kmem_cache", kmem_size, |
3062 | create_kmalloc_cache(&kmalloc_caches[1], | 3210 | 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); |
3063 | "kmalloc-96", 96, GFP_NOWAIT); | 3211 | kmem_cache = kmem_cache_alloc(kmem_cache, GFP_NOWAIT); |
3064 | caches++; | 3212 | memcpy(kmem_cache, temp_kmem_cache, kmem_size); |
3065 | } | ||
3066 | if (KMALLOC_MIN_SIZE <= 64) { | ||
3067 | create_kmalloc_cache(&kmalloc_caches[2], | ||
3068 | "kmalloc-192", 192, GFP_NOWAIT); | ||
3069 | caches++; | ||
3070 | } | ||
3071 | 3213 | ||
3072 | for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) { | 3214 | /* |
3073 | create_kmalloc_cache(&kmalloc_caches[i], | 3215 | * Allocate kmem_cache_node properly from the kmem_cache slab. |
3074 | "kmalloc", 1 << i, GFP_NOWAIT); | 3216 | * kmem_cache_node is separately allocated so no need to |
3075 | caches++; | 3217 | * update any list pointers. |
3076 | } | 3218 | */ |
3219 | temp_kmem_cache_node = kmem_cache_node; | ||
3220 | |||
3221 | kmem_cache_node = kmem_cache_alloc(kmem_cache, GFP_NOWAIT); | ||
3222 | memcpy(kmem_cache_node, temp_kmem_cache_node, kmem_size); | ||
3223 | |||
3224 | kmem_cache_bootstrap_fixup(kmem_cache_node); | ||
3225 | |||
3226 | caches++; | ||
3227 | kmem_cache_bootstrap_fixup(kmem_cache); | ||
3228 | caches++; | ||
3229 | /* Free temporary boot structure */ | ||
3230 | free_pages((unsigned long)temp_kmem_cache, order); | ||
3077 | 3231 | ||
3232 | /* Now we can use the kmem_cache to allocate kmalloc slabs */ | ||
3078 | 3233 | ||
3079 | /* | 3234 | /* |
3080 | * Patch up the size_index table if we have strange large alignment | 3235 | * Patch up the size_index table if we have strange large alignment |
@@ -3114,26 +3269,60 @@ void __init kmem_cache_init(void) | |||
3114 | size_index[size_index_elem(i)] = 8; | 3269 | size_index[size_index_elem(i)] = 8; |
3115 | } | 3270 | } |
3116 | 3271 | ||
3272 | /* Caches that are not of the two-to-the-power-of size */ | ||
3273 | if (KMALLOC_MIN_SIZE <= 32) { | ||
3274 | kmalloc_caches[1] = create_kmalloc_cache("kmalloc-96", 96, 0); | ||
3275 | caches++; | ||
3276 | } | ||
3277 | |||
3278 | if (KMALLOC_MIN_SIZE <= 64) { | ||
3279 | kmalloc_caches[2] = create_kmalloc_cache("kmalloc-192", 192, 0); | ||
3280 | caches++; | ||
3281 | } | ||
3282 | |||
3283 | for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) { | ||
3284 | kmalloc_caches[i] = create_kmalloc_cache("kmalloc", 1 << i, 0); | ||
3285 | caches++; | ||
3286 | } | ||
3287 | |||
3117 | slab_state = UP; | 3288 | slab_state = UP; |
3118 | 3289 | ||
3119 | /* Provide the correct kmalloc names now that the caches are up */ | 3290 | /* Provide the correct kmalloc names now that the caches are up */ |
3291 | if (KMALLOC_MIN_SIZE <= 32) { | ||
3292 | kmalloc_caches[1]->name = kstrdup(kmalloc_caches[1]->name, GFP_NOWAIT); | ||
3293 | BUG_ON(!kmalloc_caches[1]->name); | ||
3294 | } | ||
3295 | |||
3296 | if (KMALLOC_MIN_SIZE <= 64) { | ||
3297 | kmalloc_caches[2]->name = kstrdup(kmalloc_caches[2]->name, GFP_NOWAIT); | ||
3298 | BUG_ON(!kmalloc_caches[2]->name); | ||
3299 | } | ||
3300 | |||
3120 | for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) { | 3301 | for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) { |
3121 | char *s = kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i); | 3302 | char *s = kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i); |
3122 | 3303 | ||
3123 | BUG_ON(!s); | 3304 | BUG_ON(!s); |
3124 | kmalloc_caches[i].name = s; | 3305 | kmalloc_caches[i]->name = s; |
3125 | } | 3306 | } |
3126 | 3307 | ||
3127 | #ifdef CONFIG_SMP | 3308 | #ifdef CONFIG_SMP |
3128 | register_cpu_notifier(&slab_notifier); | 3309 | register_cpu_notifier(&slab_notifier); |
3129 | #endif | 3310 | #endif |
3130 | #ifdef CONFIG_NUMA | ||
3131 | kmem_size = offsetof(struct kmem_cache, node) + | ||
3132 | nr_node_ids * sizeof(struct kmem_cache_node *); | ||
3133 | #else | ||
3134 | kmem_size = sizeof(struct kmem_cache); | ||
3135 | #endif | ||
3136 | 3311 | ||
3312 | #ifdef CONFIG_ZONE_DMA | ||
3313 | for (i = 0; i < SLUB_PAGE_SHIFT; i++) { | ||
3314 | struct kmem_cache *s = kmalloc_caches[i]; | ||
3315 | |||
3316 | if (s && s->size) { | ||
3317 | char *name = kasprintf(GFP_NOWAIT, | ||
3318 | "dma-kmalloc-%d", s->objsize); | ||
3319 | |||
3320 | BUG_ON(!name); | ||
3321 | kmalloc_dma_caches[i] = create_kmalloc_cache(name, | ||
3322 | s->objsize, SLAB_CACHE_DMA); | ||
3323 | } | ||
3324 | } | ||
3325 | #endif | ||
3137 | printk(KERN_INFO | 3326 | printk(KERN_INFO |
3138 | "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," | 3327 | "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," |
3139 | " CPUs=%d, Nodes=%d\n", | 3328 | " CPUs=%d, Nodes=%d\n", |
@@ -3211,6 +3400,7 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, | |||
3211 | size_t align, unsigned long flags, void (*ctor)(void *)) | 3400 | size_t align, unsigned long flags, void (*ctor)(void *)) |
3212 | { | 3401 | { |
3213 | struct kmem_cache *s; | 3402 | struct kmem_cache *s; |
3403 | char *n; | ||
3214 | 3404 | ||
3215 | if (WARN_ON(!name)) | 3405 | if (WARN_ON(!name)) |
3216 | return NULL; | 3406 | return NULL; |
@@ -3234,24 +3424,30 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, | |||
3234 | return s; | 3424 | return s; |
3235 | } | 3425 | } |
3236 | 3426 | ||
3427 | n = kstrdup(name, GFP_KERNEL); | ||
3428 | if (!n) | ||
3429 | goto err; | ||
3430 | |||
3237 | s = kmalloc(kmem_size, GFP_KERNEL); | 3431 | s = kmalloc(kmem_size, GFP_KERNEL); |
3238 | if (s) { | 3432 | if (s) { |
3239 | if (kmem_cache_open(s, GFP_KERNEL, name, | 3433 | if (kmem_cache_open(s, n, |
3240 | size, align, flags, ctor)) { | 3434 | size, align, flags, ctor)) { |
3241 | list_add(&s->list, &slab_caches); | 3435 | list_add(&s->list, &slab_caches); |
3242 | if (sysfs_slab_add(s)) { | 3436 | if (sysfs_slab_add(s)) { |
3243 | list_del(&s->list); | 3437 | list_del(&s->list); |
3438 | kfree(n); | ||
3244 | kfree(s); | 3439 | kfree(s); |
3245 | goto err; | 3440 | goto err; |
3246 | } | 3441 | } |
3247 | up_write(&slub_lock); | 3442 | up_write(&slub_lock); |
3248 | return s; | 3443 | return s; |
3249 | } | 3444 | } |
3445 | kfree(n); | ||
3250 | kfree(s); | 3446 | kfree(s); |
3251 | } | 3447 | } |
3448 | err: | ||
3252 | up_write(&slub_lock); | 3449 | up_write(&slub_lock); |
3253 | 3450 | ||
3254 | err: | ||
3255 | if (flags & SLAB_PANIC) | 3451 | if (flags & SLAB_PANIC) |
3256 | panic("Cannot create slabcache %s\n", name); | 3452 | panic("Cannot create slabcache %s\n", name); |
3257 | else | 3453 | else |
@@ -3312,12 +3508,13 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller) | |||
3312 | 3508 | ||
3313 | ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, caller); | 3509 | ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, caller); |
3314 | 3510 | ||
3315 | /* Honor the call site pointer we recieved. */ | 3511 | /* Honor the call site pointer we received. */ |
3316 | trace_kmalloc(caller, ret, size, s->size, gfpflags); | 3512 | trace_kmalloc(caller, ret, size, s->size, gfpflags); |
3317 | 3513 | ||
3318 | return ret; | 3514 | return ret; |
3319 | } | 3515 | } |
3320 | 3516 | ||
3517 | #ifdef CONFIG_NUMA | ||
3321 | void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, | 3518 | void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, |
3322 | int node, unsigned long caller) | 3519 | int node, unsigned long caller) |
3323 | { | 3520 | { |
@@ -3341,13 +3538,14 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, | |||
3341 | 3538 | ||
3342 | ret = slab_alloc(s, gfpflags, node, caller); | 3539 | ret = slab_alloc(s, gfpflags, node, caller); |
3343 | 3540 | ||
3344 | /* Honor the call site pointer we recieved. */ | 3541 | /* Honor the call site pointer we received. */ |
3345 | trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node); | 3542 | trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node); |
3346 | 3543 | ||
3347 | return ret; | 3544 | return ret; |
3348 | } | 3545 | } |
3546 | #endif | ||
3349 | 3547 | ||
3350 | #ifdef CONFIG_SLUB_DEBUG | 3548 | #ifdef CONFIG_SYSFS |
3351 | static int count_inuse(struct page *page) | 3549 | static int count_inuse(struct page *page) |
3352 | { | 3550 | { |
3353 | return page->inuse; | 3551 | return page->inuse; |
@@ -3357,7 +3555,9 @@ static int count_total(struct page *page) | |||
3357 | { | 3555 | { |
3358 | return page->objects; | 3556 | return page->objects; |
3359 | } | 3557 | } |
3558 | #endif | ||
3360 | 3559 | ||
3560 | #ifdef CONFIG_SLUB_DEBUG | ||
3361 | static int validate_slab(struct kmem_cache *s, struct page *page, | 3561 | static int validate_slab(struct kmem_cache *s, struct page *page, |
3362 | unsigned long *map) | 3562 | unsigned long *map) |
3363 | { | 3563 | { |
@@ -3371,15 +3571,16 @@ static int validate_slab(struct kmem_cache *s, struct page *page, | |||
3371 | /* Now we know that a valid freelist exists */ | 3571 | /* Now we know that a valid freelist exists */ |
3372 | bitmap_zero(map, page->objects); | 3572 | bitmap_zero(map, page->objects); |
3373 | 3573 | ||
3374 | for_each_free_object(p, s, page->freelist) { | 3574 | get_map(s, page, map); |
3375 | set_bit(slab_index(p, s, addr), map); | 3575 | for_each_object(p, s, addr, page->objects) { |
3376 | if (!check_object(s, page, p, 0)) | 3576 | if (test_bit(slab_index(p, s, addr), map)) |
3377 | return 0; | 3577 | if (!check_object(s, page, p, SLUB_RED_INACTIVE)) |
3578 | return 0; | ||
3378 | } | 3579 | } |
3379 | 3580 | ||
3380 | for_each_object(p, s, addr, page->objects) | 3581 | for_each_object(p, s, addr, page->objects) |
3381 | if (!test_bit(slab_index(p, s, addr), map)) | 3582 | if (!test_bit(slab_index(p, s, addr), map)) |
3382 | if (!check_object(s, page, p, 1)) | 3583 | if (!check_object(s, page, p, SLUB_RED_ACTIVE)) |
3383 | return 0; | 3584 | return 0; |
3384 | return 1; | 3585 | return 1; |
3385 | } | 3586 | } |
@@ -3448,65 +3649,6 @@ static long validate_slab_cache(struct kmem_cache *s) | |||
3448 | kfree(map); | 3649 | kfree(map); |
3449 | return count; | 3650 | return count; |
3450 | } | 3651 | } |
3451 | |||
3452 | #ifdef SLUB_RESILIENCY_TEST | ||
3453 | static void resiliency_test(void) | ||
3454 | { | ||
3455 | u8 *p; | ||
3456 | |||
3457 | printk(KERN_ERR "SLUB resiliency testing\n"); | ||
3458 | printk(KERN_ERR "-----------------------\n"); | ||
3459 | printk(KERN_ERR "A. Corruption after allocation\n"); | ||
3460 | |||
3461 | p = kzalloc(16, GFP_KERNEL); | ||
3462 | p[16] = 0x12; | ||
3463 | printk(KERN_ERR "\n1. kmalloc-16: Clobber Redzone/next pointer" | ||
3464 | " 0x12->0x%p\n\n", p + 16); | ||
3465 | |||
3466 | validate_slab_cache(kmalloc_caches + 4); | ||
3467 | |||
3468 | /* Hmmm... The next two are dangerous */ | ||
3469 | p = kzalloc(32, GFP_KERNEL); | ||
3470 | p[32 + sizeof(void *)] = 0x34; | ||
3471 | printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab" | ||
3472 | " 0x34 -> -0x%p\n", p); | ||
3473 | printk(KERN_ERR | ||
3474 | "If allocated object is overwritten then not detectable\n\n"); | ||
3475 | |||
3476 | validate_slab_cache(kmalloc_caches + 5); | ||
3477 | p = kzalloc(64, GFP_KERNEL); | ||
3478 | p += 64 + (get_cycles() & 0xff) * sizeof(void *); | ||
3479 | *p = 0x56; | ||
3480 | printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n", | ||
3481 | p); | ||
3482 | printk(KERN_ERR | ||
3483 | "If allocated object is overwritten then not detectable\n\n"); | ||
3484 | validate_slab_cache(kmalloc_caches + 6); | ||
3485 | |||
3486 | printk(KERN_ERR "\nB. Corruption after free\n"); | ||
3487 | p = kzalloc(128, GFP_KERNEL); | ||
3488 | kfree(p); | ||
3489 | *p = 0x78; | ||
3490 | printk(KERN_ERR "1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p); | ||
3491 | validate_slab_cache(kmalloc_caches + 7); | ||
3492 | |||
3493 | p = kzalloc(256, GFP_KERNEL); | ||
3494 | kfree(p); | ||
3495 | p[50] = 0x9a; | ||
3496 | printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", | ||
3497 | p); | ||
3498 | validate_slab_cache(kmalloc_caches + 8); | ||
3499 | |||
3500 | p = kzalloc(512, GFP_KERNEL); | ||
3501 | kfree(p); | ||
3502 | p[512] = 0xab; | ||
3503 | printk(KERN_ERR "\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p); | ||
3504 | validate_slab_cache(kmalloc_caches + 9); | ||
3505 | } | ||
3506 | #else | ||
3507 | static void resiliency_test(void) {}; | ||
3508 | #endif | ||
3509 | |||
3510 | /* | 3652 | /* |
3511 | * Generate lists of code addresses where slabcache objects are allocated | 3653 | * Generate lists of code addresses where slabcache objects are allocated |
3512 | * and freed. | 3654 | * and freed. |
@@ -3635,14 +3777,13 @@ static int add_location(struct loc_track *t, struct kmem_cache *s, | |||
3635 | 3777 | ||
3636 | static void process_slab(struct loc_track *t, struct kmem_cache *s, | 3778 | static void process_slab(struct loc_track *t, struct kmem_cache *s, |
3637 | struct page *page, enum track_item alloc, | 3779 | struct page *page, enum track_item alloc, |
3638 | long *map) | 3780 | unsigned long *map) |
3639 | { | 3781 | { |
3640 | void *addr = page_address(page); | 3782 | void *addr = page_address(page); |
3641 | void *p; | 3783 | void *p; |
3642 | 3784 | ||
3643 | bitmap_zero(map, page->objects); | 3785 | bitmap_zero(map, page->objects); |
3644 | for_each_free_object(p, s, page->freelist) | 3786 | get_map(s, page, map); |
3645 | set_bit(slab_index(p, s, addr), map); | ||
3646 | 3787 | ||
3647 | for_each_object(p, s, addr, page->objects) | 3788 | for_each_object(p, s, addr, page->objects) |
3648 | if (!test_bit(slab_index(p, s, addr), map)) | 3789 | if (!test_bit(slab_index(p, s, addr), map)) |
@@ -3691,7 +3832,7 @@ static int list_locations(struct kmem_cache *s, char *buf, | |||
3691 | len += sprintf(buf + len, "%7ld ", l->count); | 3832 | len += sprintf(buf + len, "%7ld ", l->count); |
3692 | 3833 | ||
3693 | if (l->addr) | 3834 | if (l->addr) |
3694 | len += sprint_symbol(buf + len, (unsigned long)l->addr); | 3835 | len += sprintf(buf + len, "%pS", (void *)l->addr); |
3695 | else | 3836 | else |
3696 | len += sprintf(buf + len, "<not-available>"); | 3837 | len += sprintf(buf + len, "<not-available>"); |
3697 | 3838 | ||
@@ -3735,7 +3876,71 @@ static int list_locations(struct kmem_cache *s, char *buf, | |||
3735 | len += sprintf(buf, "No data\n"); | 3876 | len += sprintf(buf, "No data\n"); |
3736 | return len; | 3877 | return len; |
3737 | } | 3878 | } |
3879 | #endif | ||
3880 | |||
3881 | #ifdef SLUB_RESILIENCY_TEST | ||
3882 | static void resiliency_test(void) | ||
3883 | { | ||
3884 | u8 *p; | ||
3885 | |||
3886 | BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || SLUB_PAGE_SHIFT < 10); | ||
3887 | |||
3888 | printk(KERN_ERR "SLUB resiliency testing\n"); | ||
3889 | printk(KERN_ERR "-----------------------\n"); | ||
3890 | printk(KERN_ERR "A. Corruption after allocation\n"); | ||
3891 | |||
3892 | p = kzalloc(16, GFP_KERNEL); | ||
3893 | p[16] = 0x12; | ||
3894 | printk(KERN_ERR "\n1. kmalloc-16: Clobber Redzone/next pointer" | ||
3895 | " 0x12->0x%p\n\n", p + 16); | ||
3896 | |||
3897 | validate_slab_cache(kmalloc_caches[4]); | ||
3898 | |||
3899 | /* Hmmm... The next two are dangerous */ | ||
3900 | p = kzalloc(32, GFP_KERNEL); | ||
3901 | p[32 + sizeof(void *)] = 0x34; | ||
3902 | printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab" | ||
3903 | " 0x34 -> -0x%p\n", p); | ||
3904 | printk(KERN_ERR | ||
3905 | "If allocated object is overwritten then not detectable\n\n"); | ||
3906 | |||
3907 | validate_slab_cache(kmalloc_caches[5]); | ||
3908 | p = kzalloc(64, GFP_KERNEL); | ||
3909 | p += 64 + (get_cycles() & 0xff) * sizeof(void *); | ||
3910 | *p = 0x56; | ||
3911 | printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n", | ||
3912 | p); | ||
3913 | printk(KERN_ERR | ||
3914 | "If allocated object is overwritten then not detectable\n\n"); | ||
3915 | validate_slab_cache(kmalloc_caches[6]); | ||
3916 | |||
3917 | printk(KERN_ERR "\nB. Corruption after free\n"); | ||
3918 | p = kzalloc(128, GFP_KERNEL); | ||
3919 | kfree(p); | ||
3920 | *p = 0x78; | ||
3921 | printk(KERN_ERR "1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p); | ||
3922 | validate_slab_cache(kmalloc_caches[7]); | ||
3923 | |||
3924 | p = kzalloc(256, GFP_KERNEL); | ||
3925 | kfree(p); | ||
3926 | p[50] = 0x9a; | ||
3927 | printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", | ||
3928 | p); | ||
3929 | validate_slab_cache(kmalloc_caches[8]); | ||
3930 | |||
3931 | p = kzalloc(512, GFP_KERNEL); | ||
3932 | kfree(p); | ||
3933 | p[512] = 0xab; | ||
3934 | printk(KERN_ERR "\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p); | ||
3935 | validate_slab_cache(kmalloc_caches[9]); | ||
3936 | } | ||
3937 | #else | ||
3938 | #ifdef CONFIG_SYSFS | ||
3939 | static void resiliency_test(void) {}; | ||
3940 | #endif | ||
3941 | #endif | ||
3738 | 3942 | ||
3943 | #ifdef CONFIG_SYSFS | ||
3739 | enum slab_stat_type { | 3944 | enum slab_stat_type { |
3740 | SL_ALL, /* All slabs */ | 3945 | SL_ALL, /* All slabs */ |
3741 | SL_PARTIAL, /* Only partially allocated slabs */ | 3946 | SL_PARTIAL, /* Only partially allocated slabs */ |
@@ -3788,6 +3993,8 @@ static ssize_t show_slab_objects(struct kmem_cache *s, | |||
3788 | } | 3993 | } |
3789 | } | 3994 | } |
3790 | 3995 | ||
3996 | lock_memory_hotplug(); | ||
3997 | #ifdef CONFIG_SLUB_DEBUG | ||
3791 | if (flags & SO_ALL) { | 3998 | if (flags & SO_ALL) { |
3792 | for_each_node_state(node, N_NORMAL_MEMORY) { | 3999 | for_each_node_state(node, N_NORMAL_MEMORY) { |
3793 | struct kmem_cache_node *n = get_node(s, node); | 4000 | struct kmem_cache_node *n = get_node(s, node); |
@@ -3804,7 +4011,9 @@ static ssize_t show_slab_objects(struct kmem_cache *s, | |||
3804 | nodes[node] += x; | 4011 | nodes[node] += x; |
3805 | } | 4012 | } |
3806 | 4013 | ||
3807 | } else if (flags & SO_PARTIAL) { | 4014 | } else |
4015 | #endif | ||
4016 | if (flags & SO_PARTIAL) { | ||
3808 | for_each_node_state(node, N_NORMAL_MEMORY) { | 4017 | for_each_node_state(node, N_NORMAL_MEMORY) { |
3809 | struct kmem_cache_node *n = get_node(s, node); | 4018 | struct kmem_cache_node *n = get_node(s, node); |
3810 | 4019 | ||
@@ -3825,10 +4034,12 @@ static ssize_t show_slab_objects(struct kmem_cache *s, | |||
3825 | x += sprintf(buf + x, " N%d=%lu", | 4034 | x += sprintf(buf + x, " N%d=%lu", |
3826 | node, nodes[node]); | 4035 | node, nodes[node]); |
3827 | #endif | 4036 | #endif |
4037 | unlock_memory_hotplug(); | ||
3828 | kfree(nodes); | 4038 | kfree(nodes); |
3829 | return x + sprintf(buf + x, "\n"); | 4039 | return x + sprintf(buf + x, "\n"); |
3830 | } | 4040 | } |
3831 | 4041 | ||
4042 | #ifdef CONFIG_SLUB_DEBUG | ||
3832 | static int any_slab_objects(struct kmem_cache *s) | 4043 | static int any_slab_objects(struct kmem_cache *s) |
3833 | { | 4044 | { |
3834 | int node; | 4045 | int node; |
@@ -3844,6 +4055,7 @@ static int any_slab_objects(struct kmem_cache *s) | |||
3844 | } | 4055 | } |
3845 | return 0; | 4056 | return 0; |
3846 | } | 4057 | } |
4058 | #endif | ||
3847 | 4059 | ||
3848 | #define to_slab_attr(n) container_of(n, struct slab_attribute, attr) | 4060 | #define to_slab_attr(n) container_of(n, struct slab_attribute, attr) |
3849 | #define to_slab(n) container_of(n, struct kmem_cache, kobj); | 4061 | #define to_slab(n) container_of(n, struct kmem_cache, kobj); |
@@ -3930,12 +4142,9 @@ SLAB_ATTR(min_partial); | |||
3930 | 4142 | ||
3931 | static ssize_t ctor_show(struct kmem_cache *s, char *buf) | 4143 | static ssize_t ctor_show(struct kmem_cache *s, char *buf) |
3932 | { | 4144 | { |
3933 | if (s->ctor) { | 4145 | if (!s->ctor) |
3934 | int n = sprint_symbol(buf, (unsigned long)s->ctor); | 4146 | return 0; |
3935 | 4147 | return sprintf(buf, "%pS\n", s->ctor); | |
3936 | return n + sprintf(buf + n, "\n"); | ||
3937 | } | ||
3938 | return 0; | ||
3939 | } | 4148 | } |
3940 | SLAB_ATTR_RO(ctor); | 4149 | SLAB_ATTR_RO(ctor); |
3941 | 4150 | ||
@@ -3945,12 +4154,6 @@ static ssize_t aliases_show(struct kmem_cache *s, char *buf) | |||
3945 | } | 4154 | } |
3946 | SLAB_ATTR_RO(aliases); | 4155 | SLAB_ATTR_RO(aliases); |
3947 | 4156 | ||
3948 | static ssize_t slabs_show(struct kmem_cache *s, char *buf) | ||
3949 | { | ||
3950 | return show_slab_objects(s, buf, SO_ALL); | ||
3951 | } | ||
3952 | SLAB_ATTR_RO(slabs); | ||
3953 | |||
3954 | static ssize_t partial_show(struct kmem_cache *s, char *buf) | 4157 | static ssize_t partial_show(struct kmem_cache *s, char *buf) |
3955 | { | 4158 | { |
3956 | return show_slab_objects(s, buf, SO_PARTIAL); | 4159 | return show_slab_objects(s, buf, SO_PARTIAL); |
@@ -3975,93 +4178,89 @@ static ssize_t objects_partial_show(struct kmem_cache *s, char *buf) | |||
3975 | } | 4178 | } |
3976 | SLAB_ATTR_RO(objects_partial); | 4179 | SLAB_ATTR_RO(objects_partial); |
3977 | 4180 | ||
3978 | static ssize_t total_objects_show(struct kmem_cache *s, char *buf) | 4181 | static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf) |
3979 | { | ||
3980 | return show_slab_objects(s, buf, SO_ALL|SO_TOTAL); | ||
3981 | } | ||
3982 | SLAB_ATTR_RO(total_objects); | ||
3983 | |||
3984 | static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf) | ||
3985 | { | 4182 | { |
3986 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_DEBUG_FREE)); | 4183 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT)); |
3987 | } | 4184 | } |
3988 | 4185 | ||
3989 | static ssize_t sanity_checks_store(struct kmem_cache *s, | 4186 | static ssize_t reclaim_account_store(struct kmem_cache *s, |
3990 | const char *buf, size_t length) | 4187 | const char *buf, size_t length) |
3991 | { | 4188 | { |
3992 | s->flags &= ~SLAB_DEBUG_FREE; | 4189 | s->flags &= ~SLAB_RECLAIM_ACCOUNT; |
3993 | if (buf[0] == '1') | 4190 | if (buf[0] == '1') |
3994 | s->flags |= SLAB_DEBUG_FREE; | 4191 | s->flags |= SLAB_RECLAIM_ACCOUNT; |
3995 | return length; | 4192 | return length; |
3996 | } | 4193 | } |
3997 | SLAB_ATTR(sanity_checks); | 4194 | SLAB_ATTR(reclaim_account); |
3998 | 4195 | ||
3999 | static ssize_t trace_show(struct kmem_cache *s, char *buf) | 4196 | static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf) |
4000 | { | 4197 | { |
4001 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE)); | 4198 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN)); |
4002 | } | 4199 | } |
4200 | SLAB_ATTR_RO(hwcache_align); | ||
4003 | 4201 | ||
4004 | static ssize_t trace_store(struct kmem_cache *s, const char *buf, | 4202 | #ifdef CONFIG_ZONE_DMA |
4005 | size_t length) | 4203 | static ssize_t cache_dma_show(struct kmem_cache *s, char *buf) |
4006 | { | 4204 | { |
4007 | s->flags &= ~SLAB_TRACE; | 4205 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA)); |
4008 | if (buf[0] == '1') | ||
4009 | s->flags |= SLAB_TRACE; | ||
4010 | return length; | ||
4011 | } | 4206 | } |
4012 | SLAB_ATTR(trace); | 4207 | SLAB_ATTR_RO(cache_dma); |
4208 | #endif | ||
4013 | 4209 | ||
4014 | #ifdef CONFIG_FAILSLAB | 4210 | static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf) |
4015 | static ssize_t failslab_show(struct kmem_cache *s, char *buf) | ||
4016 | { | 4211 | { |
4017 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB)); | 4212 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_DESTROY_BY_RCU)); |
4018 | } | 4213 | } |
4214 | SLAB_ATTR_RO(destroy_by_rcu); | ||
4019 | 4215 | ||
4020 | static ssize_t failslab_store(struct kmem_cache *s, const char *buf, | 4216 | static ssize_t reserved_show(struct kmem_cache *s, char *buf) |
4021 | size_t length) | ||
4022 | { | 4217 | { |
4023 | s->flags &= ~SLAB_FAILSLAB; | 4218 | return sprintf(buf, "%d\n", s->reserved); |
4024 | if (buf[0] == '1') | ||
4025 | s->flags |= SLAB_FAILSLAB; | ||
4026 | return length; | ||
4027 | } | 4219 | } |
4028 | SLAB_ATTR(failslab); | 4220 | SLAB_ATTR_RO(reserved); |
4029 | #endif | ||
4030 | 4221 | ||
4031 | static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf) | 4222 | #ifdef CONFIG_SLUB_DEBUG |
4223 | static ssize_t slabs_show(struct kmem_cache *s, char *buf) | ||
4032 | { | 4224 | { |
4033 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT)); | 4225 | return show_slab_objects(s, buf, SO_ALL); |
4034 | } | 4226 | } |
4227 | SLAB_ATTR_RO(slabs); | ||
4035 | 4228 | ||
4036 | static ssize_t reclaim_account_store(struct kmem_cache *s, | 4229 | static ssize_t total_objects_show(struct kmem_cache *s, char *buf) |
4037 | const char *buf, size_t length) | ||
4038 | { | 4230 | { |
4039 | s->flags &= ~SLAB_RECLAIM_ACCOUNT; | 4231 | return show_slab_objects(s, buf, SO_ALL|SO_TOTAL); |
4040 | if (buf[0] == '1') | ||
4041 | s->flags |= SLAB_RECLAIM_ACCOUNT; | ||
4042 | return length; | ||
4043 | } | 4232 | } |
4044 | SLAB_ATTR(reclaim_account); | 4233 | SLAB_ATTR_RO(total_objects); |
4045 | 4234 | ||
4046 | static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf) | 4235 | static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf) |
4047 | { | 4236 | { |
4048 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN)); | 4237 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_DEBUG_FREE)); |
4049 | } | 4238 | } |
4050 | SLAB_ATTR_RO(hwcache_align); | ||
4051 | 4239 | ||
4052 | #ifdef CONFIG_ZONE_DMA | 4240 | static ssize_t sanity_checks_store(struct kmem_cache *s, |
4053 | static ssize_t cache_dma_show(struct kmem_cache *s, char *buf) | 4241 | const char *buf, size_t length) |
4054 | { | 4242 | { |
4055 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA)); | 4243 | s->flags &= ~SLAB_DEBUG_FREE; |
4244 | if (buf[0] == '1') | ||
4245 | s->flags |= SLAB_DEBUG_FREE; | ||
4246 | return length; | ||
4056 | } | 4247 | } |
4057 | SLAB_ATTR_RO(cache_dma); | 4248 | SLAB_ATTR(sanity_checks); |
4058 | #endif | ||
4059 | 4249 | ||
4060 | static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf) | 4250 | static ssize_t trace_show(struct kmem_cache *s, char *buf) |
4061 | { | 4251 | { |
4062 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_DESTROY_BY_RCU)); | 4252 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE)); |
4063 | } | 4253 | } |
4064 | SLAB_ATTR_RO(destroy_by_rcu); | 4254 | |
4255 | static ssize_t trace_store(struct kmem_cache *s, const char *buf, | ||
4256 | size_t length) | ||
4257 | { | ||
4258 | s->flags &= ~SLAB_TRACE; | ||
4259 | if (buf[0] == '1') | ||
4260 | s->flags |= SLAB_TRACE; | ||
4261 | return length; | ||
4262 | } | ||
4263 | SLAB_ATTR(trace); | ||
4065 | 4264 | ||
4066 | static ssize_t red_zone_show(struct kmem_cache *s, char *buf) | 4265 | static ssize_t red_zone_show(struct kmem_cache *s, char *buf) |
4067 | { | 4266 | { |
@@ -4139,6 +4338,40 @@ static ssize_t validate_store(struct kmem_cache *s, | |||
4139 | } | 4338 | } |
4140 | SLAB_ATTR(validate); | 4339 | SLAB_ATTR(validate); |
4141 | 4340 | ||
4341 | static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf) | ||
4342 | { | ||
4343 | if (!(s->flags & SLAB_STORE_USER)) | ||
4344 | return -ENOSYS; | ||
4345 | return list_locations(s, buf, TRACK_ALLOC); | ||
4346 | } | ||
4347 | SLAB_ATTR_RO(alloc_calls); | ||
4348 | |||
4349 | static ssize_t free_calls_show(struct kmem_cache *s, char *buf) | ||
4350 | { | ||
4351 | if (!(s->flags & SLAB_STORE_USER)) | ||
4352 | return -ENOSYS; | ||
4353 | return list_locations(s, buf, TRACK_FREE); | ||
4354 | } | ||
4355 | SLAB_ATTR_RO(free_calls); | ||
4356 | #endif /* CONFIG_SLUB_DEBUG */ | ||
4357 | |||
4358 | #ifdef CONFIG_FAILSLAB | ||
4359 | static ssize_t failslab_show(struct kmem_cache *s, char *buf) | ||
4360 | { | ||
4361 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB)); | ||
4362 | } | ||
4363 | |||
4364 | static ssize_t failslab_store(struct kmem_cache *s, const char *buf, | ||
4365 | size_t length) | ||
4366 | { | ||
4367 | s->flags &= ~SLAB_FAILSLAB; | ||
4368 | if (buf[0] == '1') | ||
4369 | s->flags |= SLAB_FAILSLAB; | ||
4370 | return length; | ||
4371 | } | ||
4372 | SLAB_ATTR(failslab); | ||
4373 | #endif | ||
4374 | |||
4142 | static ssize_t shrink_show(struct kmem_cache *s, char *buf) | 4375 | static ssize_t shrink_show(struct kmem_cache *s, char *buf) |
4143 | { | 4376 | { |
4144 | return 0; | 4377 | return 0; |
@@ -4158,22 +4391,6 @@ static ssize_t shrink_store(struct kmem_cache *s, | |||
4158 | } | 4391 | } |
4159 | SLAB_ATTR(shrink); | 4392 | SLAB_ATTR(shrink); |
4160 | 4393 | ||
4161 | static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf) | ||
4162 | { | ||
4163 | if (!(s->flags & SLAB_STORE_USER)) | ||
4164 | return -ENOSYS; | ||
4165 | return list_locations(s, buf, TRACK_ALLOC); | ||
4166 | } | ||
4167 | SLAB_ATTR_RO(alloc_calls); | ||
4168 | |||
4169 | static ssize_t free_calls_show(struct kmem_cache *s, char *buf) | ||
4170 | { | ||
4171 | if (!(s->flags & SLAB_STORE_USER)) | ||
4172 | return -ENOSYS; | ||
4173 | return list_locations(s, buf, TRACK_FREE); | ||
4174 | } | ||
4175 | SLAB_ATTR_RO(free_calls); | ||
4176 | |||
4177 | #ifdef CONFIG_NUMA | 4394 | #ifdef CONFIG_NUMA |
4178 | static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf) | 4395 | static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf) |
4179 | { | 4396 | { |
@@ -4279,25 +4496,28 @@ static struct attribute *slab_attrs[] = { | |||
4279 | &min_partial_attr.attr, | 4496 | &min_partial_attr.attr, |
4280 | &objects_attr.attr, | 4497 | &objects_attr.attr, |
4281 | &objects_partial_attr.attr, | 4498 | &objects_partial_attr.attr, |
4282 | &total_objects_attr.attr, | ||
4283 | &slabs_attr.attr, | ||
4284 | &partial_attr.attr, | 4499 | &partial_attr.attr, |
4285 | &cpu_slabs_attr.attr, | 4500 | &cpu_slabs_attr.attr, |
4286 | &ctor_attr.attr, | 4501 | &ctor_attr.attr, |
4287 | &aliases_attr.attr, | 4502 | &aliases_attr.attr, |
4288 | &align_attr.attr, | 4503 | &align_attr.attr, |
4289 | &sanity_checks_attr.attr, | ||
4290 | &trace_attr.attr, | ||
4291 | &hwcache_align_attr.attr, | 4504 | &hwcache_align_attr.attr, |
4292 | &reclaim_account_attr.attr, | 4505 | &reclaim_account_attr.attr, |
4293 | &destroy_by_rcu_attr.attr, | 4506 | &destroy_by_rcu_attr.attr, |
4507 | &shrink_attr.attr, | ||
4508 | &reserved_attr.attr, | ||
4509 | #ifdef CONFIG_SLUB_DEBUG | ||
4510 | &total_objects_attr.attr, | ||
4511 | &slabs_attr.attr, | ||
4512 | &sanity_checks_attr.attr, | ||
4513 | &trace_attr.attr, | ||
4294 | &red_zone_attr.attr, | 4514 | &red_zone_attr.attr, |
4295 | &poison_attr.attr, | 4515 | &poison_attr.attr, |
4296 | &store_user_attr.attr, | 4516 | &store_user_attr.attr, |
4297 | &validate_attr.attr, | 4517 | &validate_attr.attr, |
4298 | &shrink_attr.attr, | ||
4299 | &alloc_calls_attr.attr, | 4518 | &alloc_calls_attr.attr, |
4300 | &free_calls_attr.attr, | 4519 | &free_calls_attr.attr, |
4520 | #endif | ||
4301 | #ifdef CONFIG_ZONE_DMA | 4521 | #ifdef CONFIG_ZONE_DMA |
4302 | &cache_dma_attr.attr, | 4522 | &cache_dma_attr.attr, |
4303 | #endif | 4523 | #endif |
@@ -4377,6 +4597,7 @@ static void kmem_cache_release(struct kobject *kobj) | |||
4377 | { | 4597 | { |
4378 | struct kmem_cache *s = to_slab(kobj); | 4598 | struct kmem_cache *s = to_slab(kobj); |
4379 | 4599 | ||
4600 | kfree(s->name); | ||
4380 | kfree(s); | 4601 | kfree(s); |
4381 | } | 4602 | } |
4382 | 4603 | ||
@@ -4579,7 +4800,7 @@ static int __init slab_sysfs_init(void) | |||
4579 | } | 4800 | } |
4580 | 4801 | ||
4581 | __initcall(slab_sysfs_init); | 4802 | __initcall(slab_sysfs_init); |
4582 | #endif | 4803 | #endif /* CONFIG_SYSFS */ |
4583 | 4804 | ||
4584 | /* | 4805 | /* |
4585 | * The /proc/slabinfo ABI | 4806 | * The /proc/slabinfo ABI |