aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/mempolicy.h2
-rw-r--r--include/linux/mm_types.h11
-rw-r--r--include/linux/slab.h24
-rw-r--r--include/linux/slab_def.h12
-rw-r--r--include/linux/slub_def.h3
-rw-r--r--mm/Makefile3
-rw-r--r--mm/mempolicy.c8
-rw-r--r--mm/slab.c406
-rw-r--r--mm/slab.h33
-rw-r--r--mm/slab_common.c120
-rw-r--r--mm/slob.c152
-rw-r--r--mm/slub.c436
-rw-r--r--tools/vm/slabinfo.c14
13 files changed, 608 insertions, 616 deletions
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 4aa42732e47f..95b738c7abff 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -215,7 +215,7 @@ extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
215extern bool init_nodemask_of_mempolicy(nodemask_t *mask); 215extern bool init_nodemask_of_mempolicy(nodemask_t *mask);
216extern bool mempolicy_nodemask_intersects(struct task_struct *tsk, 216extern bool mempolicy_nodemask_intersects(struct task_struct *tsk,
217 const nodemask_t *mask); 217 const nodemask_t *mask);
218extern unsigned slab_node(struct mempolicy *policy); 218extern unsigned slab_node(void);
219 219
220extern enum zone_type policy_zone; 220extern enum zone_type policy_zone;
221 221
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 704a626d94a0..074eb98fe15d 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -53,7 +53,7 @@ struct page {
53 struct { 53 struct {
54 union { 54 union {
55 pgoff_t index; /* Our offset within mapping. */ 55 pgoff_t index; /* Our offset within mapping. */
56 void *freelist; /* slub first free object */ 56 void *freelist; /* slub/slob first free object */
57 }; 57 };
58 58
59 union { 59 union {
@@ -91,11 +91,12 @@ struct page {
91 */ 91 */
92 atomic_t _mapcount; 92 atomic_t _mapcount;
93 93
94 struct { 94 struct { /* SLUB */
95 unsigned inuse:16; 95 unsigned inuse:16;
96 unsigned objects:15; 96 unsigned objects:15;
97 unsigned frozen:1; 97 unsigned frozen:1;
98 }; 98 };
99 int units; /* SLOB */
99 }; 100 };
100 atomic_t _count; /* Usage count, see below. */ 101 atomic_t _count; /* Usage count, see below. */
101 }; 102 };
@@ -117,6 +118,12 @@ struct page {
117 short int pobjects; 118 short int pobjects;
118#endif 119#endif
119 }; 120 };
121
122 struct list_head list; /* slobs list of pages */
123 struct { /* slab fields */
124 struct kmem_cache *slab_cache;
125 struct slab *slab_page;
126 };
120 }; 127 };
121 128
122 /* Remainder is not double word aligned */ 129 /* Remainder is not double word aligned */
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 67d5d94b783a..0dd2dfa7beca 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -93,6 +93,30 @@
93 (unsigned long)ZERO_SIZE_PTR) 93 (unsigned long)ZERO_SIZE_PTR)
94 94
95/* 95/*
96 * Common fields provided in kmem_cache by all slab allocators
97 * This struct is either used directly by the allocator (SLOB)
98 * or the allocator must include definitions for all fields
99 * provided in kmem_cache_common in their definition of kmem_cache.
100 *
101 * Once we can do anonymous structs (C11 standard) we could put a
102 * anonymous struct definition in these allocators so that the
103 * separate allocations in the kmem_cache structure of SLAB and
104 * SLUB is no longer needed.
105 */
106#ifdef CONFIG_SLOB
107struct kmem_cache {
108 unsigned int object_size;/* The original size of the object */
109 unsigned int size; /* The aligned/padded/added on size */
110 unsigned int align; /* Alignment as calculated */
111 unsigned long flags; /* Active flags on the slab */
112 const char *name; /* Slab name for sysfs */
113 int refcount; /* Use counter */
114 void (*ctor)(void *); /* Called on object slot creation */
115 struct list_head list; /* List of all slab caches on the system */
116};
117#endif
118
119/*
96 * struct kmem_cache related prototypes 120 * struct kmem_cache related prototypes
97 */ 121 */
98void __init kmem_cache_init(void); 122void __init kmem_cache_init(void);
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index fbd1117fdfde..0c634fa376c9 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -27,7 +27,7 @@ struct kmem_cache {
27 unsigned int limit; 27 unsigned int limit;
28 unsigned int shared; 28 unsigned int shared;
29 29
30 unsigned int buffer_size; 30 unsigned int size;
31 u32 reciprocal_buffer_size; 31 u32 reciprocal_buffer_size;
32/* 2) touched by every alloc & free from the backend */ 32/* 2) touched by every alloc & free from the backend */
33 33
@@ -39,7 +39,7 @@ struct kmem_cache {
39 unsigned int gfporder; 39 unsigned int gfporder;
40 40
41 /* force GFP flags, e.g. GFP_DMA */ 41 /* force GFP flags, e.g. GFP_DMA */
42 gfp_t gfpflags; 42 gfp_t allocflags;
43 43
44 size_t colour; /* cache colouring range */ 44 size_t colour; /* cache colouring range */
45 unsigned int colour_off; /* colour offset */ 45 unsigned int colour_off; /* colour offset */
@@ -52,7 +52,10 @@ struct kmem_cache {
52 52
53/* 4) cache creation/removal */ 53/* 4) cache creation/removal */
54 const char *name; 54 const char *name;
55 struct list_head next; 55 struct list_head list;
56 int refcount;
57 int object_size;
58 int align;
56 59
57/* 5) statistics */ 60/* 5) statistics */
58#ifdef CONFIG_DEBUG_SLAB 61#ifdef CONFIG_DEBUG_SLAB
@@ -73,12 +76,11 @@ struct kmem_cache {
73 76
74 /* 77 /*
75 * If debugging is enabled, then the allocator can add additional 78 * If debugging is enabled, then the allocator can add additional
76 * fields and/or padding to every object. buffer_size contains the total 79 * fields and/or padding to every object. size contains the total
77 * object size including these internal fields, the following two 80 * object size including these internal fields, the following two
78 * variables contain the offset to the user object and its size. 81 * variables contain the offset to the user object and its size.
79 */ 82 */
80 int obj_offset; 83 int obj_offset;
81 int obj_size;
82#endif /* CONFIG_DEBUG_SLAB */ 84#endif /* CONFIG_DEBUG_SLAB */
83 85
84/* 6) per-cpu/per-node data, touched during every alloc/free */ 86/* 6) per-cpu/per-node data, touched during every alloc/free */
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index c2f8c8bc56ed..df448adb7283 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -48,7 +48,6 @@ struct kmem_cache_cpu {
48 unsigned long tid; /* Globally unique transaction id */ 48 unsigned long tid; /* Globally unique transaction id */
49 struct page *page; /* The slab from which we are allocating */ 49 struct page *page; /* The slab from which we are allocating */
50 struct page *partial; /* Partially allocated frozen slabs */ 50 struct page *partial; /* Partially allocated frozen slabs */
51 int node; /* The node of the page (or -1 for debug) */
52#ifdef CONFIG_SLUB_STATS 51#ifdef CONFIG_SLUB_STATS
53 unsigned stat[NR_SLUB_STAT_ITEMS]; 52 unsigned stat[NR_SLUB_STAT_ITEMS];
54#endif 53#endif
@@ -83,7 +82,7 @@ struct kmem_cache {
83 unsigned long flags; 82 unsigned long flags;
84 unsigned long min_partial; 83 unsigned long min_partial;
85 int size; /* The size of an object including meta data */ 84 int size; /* The size of an object including meta data */
86 int objsize; /* The size of an object without meta data */ 85 int object_size; /* The size of an object without meta data */
87 int offset; /* Free pointer offset. */ 86 int offset; /* Free pointer offset. */
88 int cpu_partial; /* Number of per cpu partial objects to keep around */ 87 int cpu_partial; /* Number of per cpu partial objects to keep around */
89 struct kmem_cache_order_objects oo; 88 struct kmem_cache_order_objects oo;
diff --git a/mm/Makefile b/mm/Makefile
index 2e2fbbefb99f..8e81fe263c94 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -16,7 +16,8 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \
16 readahead.o swap.o truncate.o vmscan.o shmem.o \ 16 readahead.o swap.o truncate.o vmscan.o shmem.o \
17 prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ 17 prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
18 page_isolation.o mm_init.o mmu_context.o percpu.o \ 18 page_isolation.o mm_init.o mmu_context.o percpu.o \
19 compaction.o $(mmu-y) 19 compaction.o slab_common.o $(mmu-y)
20
20obj-y += init-mm.o 21obj-y += init-mm.o
21 22
22ifdef CONFIG_NO_BOOTMEM 23ifdef CONFIG_NO_BOOTMEM
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 1d771e4200d2..bd92431d4c49 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1602,8 +1602,14 @@ static unsigned interleave_nodes(struct mempolicy *policy)
1602 * task can change it's policy. The system default policy requires no 1602 * task can change it's policy. The system default policy requires no
1603 * such protection. 1603 * such protection.
1604 */ 1604 */
1605unsigned slab_node(struct mempolicy *policy) 1605unsigned slab_node(void)
1606{ 1606{
1607 struct mempolicy *policy;
1608
1609 if (in_interrupt())
1610 return numa_node_id();
1611
1612 policy = current->mempolicy;
1607 if (!policy || policy->flags & MPOL_F_LOCAL) 1613 if (!policy || policy->flags & MPOL_F_LOCAL)
1608 return numa_node_id(); 1614 return numa_node_id();
1609 1615
diff --git a/mm/slab.c b/mm/slab.c
index e901a36e2520..1fcf3ac94b6c 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -68,7 +68,7 @@
68 * Further notes from the original documentation: 68 * Further notes from the original documentation:
69 * 69 *
70 * 11 April '97. Started multi-threading - markhe 70 * 11 April '97. Started multi-threading - markhe
71 * The global cache-chain is protected by the mutex 'cache_chain_mutex'. 71 * The global cache-chain is protected by the mutex 'slab_mutex'.
72 * The sem is only needed when accessing/extending the cache-chain, which 72 * The sem is only needed when accessing/extending the cache-chain, which
73 * can never happen inside an interrupt (kmem_cache_create(), 73 * can never happen inside an interrupt (kmem_cache_create(),
74 * kmem_cache_shrink() and kmem_cache_reap()). 74 * kmem_cache_shrink() and kmem_cache_reap()).
@@ -87,6 +87,7 @@
87 */ 87 */
88 88
89#include <linux/slab.h> 89#include <linux/slab.h>
90#include "slab.h"
90#include <linux/mm.h> 91#include <linux/mm.h>
91#include <linux/poison.h> 92#include <linux/poison.h>
92#include <linux/swap.h> 93#include <linux/swap.h>
@@ -424,8 +425,8 @@ static void kmem_list3_init(struct kmem_list3 *parent)
424 * cachep->obj_offset - BYTES_PER_WORD .. cachep->obj_offset - 1: 425 * cachep->obj_offset - BYTES_PER_WORD .. cachep->obj_offset - 1:
425 * redzone word. 426 * redzone word.
426 * cachep->obj_offset: The real object. 427 * cachep->obj_offset: The real object.
427 * cachep->buffer_size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long] 428 * cachep->size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long]
428 * cachep->buffer_size - 1* BYTES_PER_WORD: last caller address 429 * cachep->size - 1* BYTES_PER_WORD: last caller address
429 * [BYTES_PER_WORD long] 430 * [BYTES_PER_WORD long]
430 */ 431 */
431static int obj_offset(struct kmem_cache *cachep) 432static int obj_offset(struct kmem_cache *cachep)
@@ -433,11 +434,6 @@ static int obj_offset(struct kmem_cache *cachep)
433 return cachep->obj_offset; 434 return cachep->obj_offset;
434} 435}
435 436
436static int obj_size(struct kmem_cache *cachep)
437{
438 return cachep->obj_size;
439}
440
441static unsigned long long *dbg_redzone1(struct kmem_cache *cachep, void *objp) 437static unsigned long long *dbg_redzone1(struct kmem_cache *cachep, void *objp)
442{ 438{
443 BUG_ON(!(cachep->flags & SLAB_RED_ZONE)); 439 BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
@@ -449,23 +445,22 @@ static unsigned long long *dbg_redzone2(struct kmem_cache *cachep, void *objp)
449{ 445{
450 BUG_ON(!(cachep->flags & SLAB_RED_ZONE)); 446 BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
451 if (cachep->flags & SLAB_STORE_USER) 447 if (cachep->flags & SLAB_STORE_USER)
452 return (unsigned long long *)(objp + cachep->buffer_size - 448 return (unsigned long long *)(objp + cachep->size -
453 sizeof(unsigned long long) - 449 sizeof(unsigned long long) -
454 REDZONE_ALIGN); 450 REDZONE_ALIGN);
455 return (unsigned long long *) (objp + cachep->buffer_size - 451 return (unsigned long long *) (objp + cachep->size -
456 sizeof(unsigned long long)); 452 sizeof(unsigned long long));
457} 453}
458 454
459static void **dbg_userword(struct kmem_cache *cachep, void *objp) 455static void **dbg_userword(struct kmem_cache *cachep, void *objp)
460{ 456{
461 BUG_ON(!(cachep->flags & SLAB_STORE_USER)); 457 BUG_ON(!(cachep->flags & SLAB_STORE_USER));
462 return (void **)(objp + cachep->buffer_size - BYTES_PER_WORD); 458 return (void **)(objp + cachep->size - BYTES_PER_WORD);
463} 459}
464 460
465#else 461#else
466 462
467#define obj_offset(x) 0 463#define obj_offset(x) 0
468#define obj_size(cachep) (cachep->buffer_size)
469#define dbg_redzone1(cachep, objp) ({BUG(); (unsigned long long *)NULL;}) 464#define dbg_redzone1(cachep, objp) ({BUG(); (unsigned long long *)NULL;})
470#define dbg_redzone2(cachep, objp) ({BUG(); (unsigned long long *)NULL;}) 465#define dbg_redzone2(cachep, objp) ({BUG(); (unsigned long long *)NULL;})
471#define dbg_userword(cachep, objp) ({BUG(); (void **)NULL;}) 466#define dbg_userword(cachep, objp) ({BUG(); (void **)NULL;})
@@ -475,7 +470,7 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp)
475#ifdef CONFIG_TRACING 470#ifdef CONFIG_TRACING
476size_t slab_buffer_size(struct kmem_cache *cachep) 471size_t slab_buffer_size(struct kmem_cache *cachep)
477{ 472{
478 return cachep->buffer_size; 473 return cachep->size;
479} 474}
480EXPORT_SYMBOL(slab_buffer_size); 475EXPORT_SYMBOL(slab_buffer_size);
481#endif 476#endif
@@ -489,56 +484,37 @@ EXPORT_SYMBOL(slab_buffer_size);
489static int slab_max_order = SLAB_MAX_ORDER_LO; 484static int slab_max_order = SLAB_MAX_ORDER_LO;
490static bool slab_max_order_set __initdata; 485static bool slab_max_order_set __initdata;
491 486
492/*
493 * Functions for storing/retrieving the cachep and or slab from the page
494 * allocator. These are used to find the slab an obj belongs to. With kfree(),
495 * these are used to find the cache which an obj belongs to.
496 */
497static inline void page_set_cache(struct page *page, struct kmem_cache *cache)
498{
499 page->lru.next = (struct list_head *)cache;
500}
501
502static inline struct kmem_cache *page_get_cache(struct page *page) 487static inline struct kmem_cache *page_get_cache(struct page *page)
503{ 488{
504 page = compound_head(page); 489 page = compound_head(page);
505 BUG_ON(!PageSlab(page)); 490 BUG_ON(!PageSlab(page));
506 return (struct kmem_cache *)page->lru.next; 491 return page->slab_cache;
507}
508
509static inline void page_set_slab(struct page *page, struct slab *slab)
510{
511 page->lru.prev = (struct list_head *)slab;
512}
513
514static inline struct slab *page_get_slab(struct page *page)
515{
516 BUG_ON(!PageSlab(page));
517 return (struct slab *)page->lru.prev;
518} 492}
519 493
520static inline struct kmem_cache *virt_to_cache(const void *obj) 494static inline struct kmem_cache *virt_to_cache(const void *obj)
521{ 495{
522 struct page *page = virt_to_head_page(obj); 496 struct page *page = virt_to_head_page(obj);
523 return page_get_cache(page); 497 return page->slab_cache;
524} 498}
525 499
526static inline struct slab *virt_to_slab(const void *obj) 500static inline struct slab *virt_to_slab(const void *obj)
527{ 501{
528 struct page *page = virt_to_head_page(obj); 502 struct page *page = virt_to_head_page(obj);
529 return page_get_slab(page); 503
504 VM_BUG_ON(!PageSlab(page));
505 return page->slab_page;
530} 506}
531 507
532static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab, 508static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab,
533 unsigned int idx) 509 unsigned int idx)
534{ 510{
535 return slab->s_mem + cache->buffer_size * idx; 511 return slab->s_mem + cache->size * idx;
536} 512}
537 513
538/* 514/*
539 * We want to avoid an expensive divide : (offset / cache->buffer_size) 515 * We want to avoid an expensive divide : (offset / cache->size)
540 * Using the fact that buffer_size is a constant for a particular cache, 516 * Using the fact that size is a constant for a particular cache,
541 * we can replace (offset / cache->buffer_size) by 517 * we can replace (offset / cache->size) by
542 * reciprocal_divide(offset, cache->reciprocal_buffer_size) 518 * reciprocal_divide(offset, cache->reciprocal_buffer_size)
543 */ 519 */
544static inline unsigned int obj_to_index(const struct kmem_cache *cache, 520static inline unsigned int obj_to_index(const struct kmem_cache *cache,
@@ -584,33 +560,12 @@ static struct kmem_cache cache_cache = {
584 .batchcount = 1, 560 .batchcount = 1,
585 .limit = BOOT_CPUCACHE_ENTRIES, 561 .limit = BOOT_CPUCACHE_ENTRIES,
586 .shared = 1, 562 .shared = 1,
587 .buffer_size = sizeof(struct kmem_cache), 563 .size = sizeof(struct kmem_cache),
588 .name = "kmem_cache", 564 .name = "kmem_cache",
589}; 565};
590 566
591#define BAD_ALIEN_MAGIC 0x01020304ul 567#define BAD_ALIEN_MAGIC 0x01020304ul
592 568
593/*
594 * chicken and egg problem: delay the per-cpu array allocation
595 * until the general caches are up.
596 */
597static enum {
598 NONE,
599 PARTIAL_AC,
600 PARTIAL_L3,
601 EARLY,
602 LATE,
603 FULL
604} g_cpucache_up;
605
606/*
607 * used by boot code to determine if it can use slab based allocator
608 */
609int slab_is_available(void)
610{
611 return g_cpucache_up >= EARLY;
612}
613
614#ifdef CONFIG_LOCKDEP 569#ifdef CONFIG_LOCKDEP
615 570
616/* 571/*
@@ -676,7 +631,7 @@ static void init_node_lock_keys(int q)
676{ 631{
677 struct cache_sizes *s = malloc_sizes; 632 struct cache_sizes *s = malloc_sizes;
678 633
679 if (g_cpucache_up < LATE) 634 if (slab_state < UP)
680 return; 635 return;
681 636
682 for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) { 637 for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) {
@@ -716,12 +671,6 @@ static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
716} 671}
717#endif 672#endif
718 673
719/*
720 * Guard access to the cache-chain.
721 */
722static DEFINE_MUTEX(cache_chain_mutex);
723static struct list_head cache_chain;
724
725static DEFINE_PER_CPU(struct delayed_work, slab_reap_work); 674static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
726 675
727static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) 676static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
@@ -1145,7 +1094,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1145 * When hotplugging memory or a cpu, existing nodelists are not replaced if 1094 * When hotplugging memory or a cpu, existing nodelists are not replaced if
1146 * already in use. 1095 * already in use.
1147 * 1096 *
1148 * Must hold cache_chain_mutex. 1097 * Must hold slab_mutex.
1149 */ 1098 */
1150static int init_cache_nodelists_node(int node) 1099static int init_cache_nodelists_node(int node)
1151{ 1100{
@@ -1153,7 +1102,7 @@ static int init_cache_nodelists_node(int node)
1153 struct kmem_list3 *l3; 1102 struct kmem_list3 *l3;
1154 const int memsize = sizeof(struct kmem_list3); 1103 const int memsize = sizeof(struct kmem_list3);
1155 1104
1156 list_for_each_entry(cachep, &cache_chain, next) { 1105 list_for_each_entry(cachep, &slab_caches, list) {
1157 /* 1106 /*
1158 * Set up the size64 kmemlist for cpu before we can 1107 * Set up the size64 kmemlist for cpu before we can
1159 * begin anything. Make sure some other cpu on this 1108 * begin anything. Make sure some other cpu on this
@@ -1169,7 +1118,7 @@ static int init_cache_nodelists_node(int node)
1169 1118
1170 /* 1119 /*
1171 * The l3s don't come and go as CPUs come and 1120 * The l3s don't come and go as CPUs come and
1172 * go. cache_chain_mutex is sufficient 1121 * go. slab_mutex is sufficient
1173 * protection here. 1122 * protection here.
1174 */ 1123 */
1175 cachep->nodelists[node] = l3; 1124 cachep->nodelists[node] = l3;
@@ -1191,7 +1140,7 @@ static void __cpuinit cpuup_canceled(long cpu)
1191 int node = cpu_to_mem(cpu); 1140 int node = cpu_to_mem(cpu);
1192 const struct cpumask *mask = cpumask_of_node(node); 1141 const struct cpumask *mask = cpumask_of_node(node);
1193 1142
1194 list_for_each_entry(cachep, &cache_chain, next) { 1143 list_for_each_entry(cachep, &slab_caches, list) {
1195 struct array_cache *nc; 1144 struct array_cache *nc;
1196 struct array_cache *shared; 1145 struct array_cache *shared;
1197 struct array_cache **alien; 1146 struct array_cache **alien;
@@ -1241,7 +1190,7 @@ free_array_cache:
1241 * the respective cache's slabs, now we can go ahead and 1190 * the respective cache's slabs, now we can go ahead and
1242 * shrink each nodelist to its limit. 1191 * shrink each nodelist to its limit.
1243 */ 1192 */
1244 list_for_each_entry(cachep, &cache_chain, next) { 1193 list_for_each_entry(cachep, &slab_caches, list) {
1245 l3 = cachep->nodelists[node]; 1194 l3 = cachep->nodelists[node];
1246 if (!l3) 1195 if (!l3)
1247 continue; 1196 continue;
@@ -1270,7 +1219,7 @@ static int __cpuinit cpuup_prepare(long cpu)
1270 * Now we can go ahead with allocating the shared arrays and 1219 * Now we can go ahead with allocating the shared arrays and
1271 * array caches 1220 * array caches
1272 */ 1221 */
1273 list_for_each_entry(cachep, &cache_chain, next) { 1222 list_for_each_entry(cachep, &slab_caches, list) {
1274 struct array_cache *nc; 1223 struct array_cache *nc;
1275 struct array_cache *shared = NULL; 1224 struct array_cache *shared = NULL;
1276 struct array_cache **alien = NULL; 1225 struct array_cache **alien = NULL;
@@ -1338,9 +1287,9 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
1338 switch (action) { 1287 switch (action) {
1339 case CPU_UP_PREPARE: 1288 case CPU_UP_PREPARE:
1340 case CPU_UP_PREPARE_FROZEN: 1289 case CPU_UP_PREPARE_FROZEN:
1341 mutex_lock(&cache_chain_mutex); 1290 mutex_lock(&slab_mutex);
1342 err = cpuup_prepare(cpu); 1291 err = cpuup_prepare(cpu);
1343 mutex_unlock(&cache_chain_mutex); 1292 mutex_unlock(&slab_mutex);
1344 break; 1293 break;
1345 case CPU_ONLINE: 1294 case CPU_ONLINE:
1346 case CPU_ONLINE_FROZEN: 1295 case CPU_ONLINE_FROZEN:
@@ -1350,7 +1299,7 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
1350 case CPU_DOWN_PREPARE: 1299 case CPU_DOWN_PREPARE:
1351 case CPU_DOWN_PREPARE_FROZEN: 1300 case CPU_DOWN_PREPARE_FROZEN:
1352 /* 1301 /*
1353 * Shutdown cache reaper. Note that the cache_chain_mutex is 1302 * Shutdown cache reaper. Note that the slab_mutex is
1354 * held so that if cache_reap() is invoked it cannot do 1303 * held so that if cache_reap() is invoked it cannot do
1355 * anything expensive but will only modify reap_work 1304 * anything expensive but will only modify reap_work
1356 * and reschedule the timer. 1305 * and reschedule the timer.
@@ -1377,9 +1326,9 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
1377#endif 1326#endif
1378 case CPU_UP_CANCELED: 1327 case CPU_UP_CANCELED:
1379 case CPU_UP_CANCELED_FROZEN: 1328 case CPU_UP_CANCELED_FROZEN:
1380 mutex_lock(&cache_chain_mutex); 1329 mutex_lock(&slab_mutex);
1381 cpuup_canceled(cpu); 1330 cpuup_canceled(cpu);
1382 mutex_unlock(&cache_chain_mutex); 1331 mutex_unlock(&slab_mutex);
1383 break; 1332 break;
1384 } 1333 }
1385 return notifier_from_errno(err); 1334 return notifier_from_errno(err);
@@ -1395,14 +1344,14 @@ static struct notifier_block __cpuinitdata cpucache_notifier = {
1395 * Returns -EBUSY if all objects cannot be drained so that the node is not 1344 * Returns -EBUSY if all objects cannot be drained so that the node is not
1396 * removed. 1345 * removed.
1397 * 1346 *
1398 * Must hold cache_chain_mutex. 1347 * Must hold slab_mutex.
1399 */ 1348 */
1400static int __meminit drain_cache_nodelists_node(int node) 1349static int __meminit drain_cache_nodelists_node(int node)
1401{ 1350{
1402 struct kmem_cache *cachep; 1351 struct kmem_cache *cachep;
1403 int ret = 0; 1352 int ret = 0;
1404 1353
1405 list_for_each_entry(cachep, &cache_chain, next) { 1354 list_for_each_entry(cachep, &slab_caches, list) {
1406 struct kmem_list3 *l3; 1355 struct kmem_list3 *l3;
1407 1356
1408 l3 = cachep->nodelists[node]; 1357 l3 = cachep->nodelists[node];
@@ -1433,14 +1382,14 @@ static int __meminit slab_memory_callback(struct notifier_block *self,
1433 1382
1434 switch (action) { 1383 switch (action) {
1435 case MEM_GOING_ONLINE: 1384 case MEM_GOING_ONLINE:
1436 mutex_lock(&cache_chain_mutex); 1385 mutex_lock(&slab_mutex);
1437 ret = init_cache_nodelists_node(nid); 1386 ret = init_cache_nodelists_node(nid);
1438 mutex_unlock(&cache_chain_mutex); 1387 mutex_unlock(&slab_mutex);
1439 break; 1388 break;
1440 case MEM_GOING_OFFLINE: 1389 case MEM_GOING_OFFLINE:
1441 mutex_lock(&cache_chain_mutex); 1390 mutex_lock(&slab_mutex);
1442 ret = drain_cache_nodelists_node(nid); 1391 ret = drain_cache_nodelists_node(nid);
1443 mutex_unlock(&cache_chain_mutex); 1392 mutex_unlock(&slab_mutex);
1444 break; 1393 break;
1445 case MEM_ONLINE: 1394 case MEM_ONLINE:
1446 case MEM_OFFLINE: 1395 case MEM_OFFLINE:
@@ -1544,8 +1493,8 @@ void __init kmem_cache_init(void)
1544 node = numa_mem_id(); 1493 node = numa_mem_id();
1545 1494
1546 /* 1) create the cache_cache */ 1495 /* 1) create the cache_cache */
1547 INIT_LIST_HEAD(&cache_chain); 1496 INIT_LIST_HEAD(&slab_caches);
1548 list_add(&cache_cache.next, &cache_chain); 1497 list_add(&cache_cache.list, &slab_caches);
1549 cache_cache.colour_off = cache_line_size(); 1498 cache_cache.colour_off = cache_line_size();
1550 cache_cache.array[smp_processor_id()] = &initarray_cache.cache; 1499 cache_cache.array[smp_processor_id()] = &initarray_cache.cache;
1551 cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node]; 1500 cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node];
@@ -1553,18 +1502,16 @@ void __init kmem_cache_init(void)
1553 /* 1502 /*
1554 * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids 1503 * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids
1555 */ 1504 */
1556 cache_cache.buffer_size = offsetof(struct kmem_cache, array[nr_cpu_ids]) + 1505 cache_cache.size = offsetof(struct kmem_cache, array[nr_cpu_ids]) +
1557 nr_node_ids * sizeof(struct kmem_list3 *); 1506 nr_node_ids * sizeof(struct kmem_list3 *);
1558#if DEBUG 1507 cache_cache.object_size = cache_cache.size;
1559 cache_cache.obj_size = cache_cache.buffer_size; 1508 cache_cache.size = ALIGN(cache_cache.size,
1560#endif
1561 cache_cache.buffer_size = ALIGN(cache_cache.buffer_size,
1562 cache_line_size()); 1509 cache_line_size());
1563 cache_cache.reciprocal_buffer_size = 1510 cache_cache.reciprocal_buffer_size =
1564 reciprocal_value(cache_cache.buffer_size); 1511 reciprocal_value(cache_cache.size);
1565 1512
1566 for (order = 0; order < MAX_ORDER; order++) { 1513 for (order = 0; order < MAX_ORDER; order++) {
1567 cache_estimate(order, cache_cache.buffer_size, 1514 cache_estimate(order, cache_cache.size,
1568 cache_line_size(), 0, &left_over, &cache_cache.num); 1515 cache_line_size(), 0, &left_over, &cache_cache.num);
1569 if (cache_cache.num) 1516 if (cache_cache.num)
1570 break; 1517 break;
@@ -1585,7 +1532,7 @@ void __init kmem_cache_init(void)
1585 * bug. 1532 * bug.
1586 */ 1533 */
1587 1534
1588 sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name, 1535 sizes[INDEX_AC].cs_cachep = __kmem_cache_create(names[INDEX_AC].name,
1589 sizes[INDEX_AC].cs_size, 1536 sizes[INDEX_AC].cs_size,
1590 ARCH_KMALLOC_MINALIGN, 1537 ARCH_KMALLOC_MINALIGN,
1591 ARCH_KMALLOC_FLAGS|SLAB_PANIC, 1538 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
@@ -1593,7 +1540,7 @@ void __init kmem_cache_init(void)
1593 1540
1594 if (INDEX_AC != INDEX_L3) { 1541 if (INDEX_AC != INDEX_L3) {
1595 sizes[INDEX_L3].cs_cachep = 1542 sizes[INDEX_L3].cs_cachep =
1596 kmem_cache_create(names[INDEX_L3].name, 1543 __kmem_cache_create(names[INDEX_L3].name,
1597 sizes[INDEX_L3].cs_size, 1544 sizes[INDEX_L3].cs_size,
1598 ARCH_KMALLOC_MINALIGN, 1545 ARCH_KMALLOC_MINALIGN,
1599 ARCH_KMALLOC_FLAGS|SLAB_PANIC, 1546 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
@@ -1611,14 +1558,14 @@ void __init kmem_cache_init(void)
1611 * allow tighter packing of the smaller caches. 1558 * allow tighter packing of the smaller caches.
1612 */ 1559 */
1613 if (!sizes->cs_cachep) { 1560 if (!sizes->cs_cachep) {
1614 sizes->cs_cachep = kmem_cache_create(names->name, 1561 sizes->cs_cachep = __kmem_cache_create(names->name,
1615 sizes->cs_size, 1562 sizes->cs_size,
1616 ARCH_KMALLOC_MINALIGN, 1563 ARCH_KMALLOC_MINALIGN,
1617 ARCH_KMALLOC_FLAGS|SLAB_PANIC, 1564 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1618 NULL); 1565 NULL);
1619 } 1566 }
1620#ifdef CONFIG_ZONE_DMA 1567#ifdef CONFIG_ZONE_DMA
1621 sizes->cs_dmacachep = kmem_cache_create( 1568 sizes->cs_dmacachep = __kmem_cache_create(
1622 names->name_dma, 1569 names->name_dma,
1623 sizes->cs_size, 1570 sizes->cs_size,
1624 ARCH_KMALLOC_MINALIGN, 1571 ARCH_KMALLOC_MINALIGN,
@@ -1676,27 +1623,27 @@ void __init kmem_cache_init(void)
1676 } 1623 }
1677 } 1624 }
1678 1625
1679 g_cpucache_up = EARLY; 1626 slab_state = UP;
1680} 1627}
1681 1628
1682void __init kmem_cache_init_late(void) 1629void __init kmem_cache_init_late(void)
1683{ 1630{
1684 struct kmem_cache *cachep; 1631 struct kmem_cache *cachep;
1685 1632
1686 g_cpucache_up = LATE; 1633 slab_state = UP;
1687 1634
1688 /* Annotate slab for lockdep -- annotate the malloc caches */ 1635 /* Annotate slab for lockdep -- annotate the malloc caches */
1689 init_lock_keys(); 1636 init_lock_keys();
1690 1637
1691 /* 6) resize the head arrays to their final sizes */ 1638 /* 6) resize the head arrays to their final sizes */
1692 mutex_lock(&cache_chain_mutex); 1639 mutex_lock(&slab_mutex);
1693 list_for_each_entry(cachep, &cache_chain, next) 1640 list_for_each_entry(cachep, &slab_caches, list)
1694 if (enable_cpucache(cachep, GFP_NOWAIT)) 1641 if (enable_cpucache(cachep, GFP_NOWAIT))
1695 BUG(); 1642 BUG();
1696 mutex_unlock(&cache_chain_mutex); 1643 mutex_unlock(&slab_mutex);
1697 1644
1698 /* Done! */ 1645 /* Done! */
1699 g_cpucache_up = FULL; 1646 slab_state = FULL;
1700 1647
1701 /* 1648 /*
1702 * Register a cpu startup notifier callback that initializes 1649 * Register a cpu startup notifier callback that initializes
@@ -1727,6 +1674,9 @@ static int __init cpucache_init(void)
1727 */ 1674 */
1728 for_each_online_cpu(cpu) 1675 for_each_online_cpu(cpu)
1729 start_cpu_timer(cpu); 1676 start_cpu_timer(cpu);
1677
1678 /* Done! */
1679 slab_state = FULL;
1730 return 0; 1680 return 0;
1731} 1681}
1732__initcall(cpucache_init); 1682__initcall(cpucache_init);
@@ -1743,7 +1693,7 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
1743 "SLAB: Unable to allocate memory on node %d (gfp=0x%x)\n", 1693 "SLAB: Unable to allocate memory on node %d (gfp=0x%x)\n",
1744 nodeid, gfpflags); 1694 nodeid, gfpflags);
1745 printk(KERN_WARNING " cache: %s, object size: %d, order: %d\n", 1695 printk(KERN_WARNING " cache: %s, object size: %d, order: %d\n",
1746 cachep->name, cachep->buffer_size, cachep->gfporder); 1696 cachep->name, cachep->size, cachep->gfporder);
1747 1697
1748 for_each_online_node(node) { 1698 for_each_online_node(node) {
1749 unsigned long active_objs = 0, num_objs = 0, free_objects = 0; 1699 unsigned long active_objs = 0, num_objs = 0, free_objects = 0;
@@ -1798,7 +1748,7 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
1798 flags |= __GFP_COMP; 1748 flags |= __GFP_COMP;
1799#endif 1749#endif
1800 1750
1801 flags |= cachep->gfpflags; 1751 flags |= cachep->allocflags;
1802 if (cachep->flags & SLAB_RECLAIM_ACCOUNT) 1752 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1803 flags |= __GFP_RECLAIMABLE; 1753 flags |= __GFP_RECLAIMABLE;
1804 1754
@@ -1874,7 +1824,7 @@ static void kmem_rcu_free(struct rcu_head *head)
1874static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr, 1824static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
1875 unsigned long caller) 1825 unsigned long caller)
1876{ 1826{
1877 int size = obj_size(cachep); 1827 int size = cachep->object_size;
1878 1828
1879 addr = (unsigned long *)&((char *)addr)[obj_offset(cachep)]; 1829 addr = (unsigned long *)&((char *)addr)[obj_offset(cachep)];
1880 1830
@@ -1906,7 +1856,7 @@ static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
1906 1856
1907static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val) 1857static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val)
1908{ 1858{
1909 int size = obj_size(cachep); 1859 int size = cachep->object_size;
1910 addr = &((char *)addr)[obj_offset(cachep)]; 1860 addr = &((char *)addr)[obj_offset(cachep)];
1911 1861
1912 memset(addr, val, size); 1862 memset(addr, val, size);
@@ -1966,7 +1916,7 @@ static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines)
1966 printk("\n"); 1916 printk("\n");
1967 } 1917 }
1968 realobj = (char *)objp + obj_offset(cachep); 1918 realobj = (char *)objp + obj_offset(cachep);
1969 size = obj_size(cachep); 1919 size = cachep->object_size;
1970 for (i = 0; i < size && lines; i += 16, lines--) { 1920 for (i = 0; i < size && lines; i += 16, lines--) {
1971 int limit; 1921 int limit;
1972 limit = 16; 1922 limit = 16;
@@ -1983,7 +1933,7 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
1983 int lines = 0; 1933 int lines = 0;
1984 1934
1985 realobj = (char *)objp + obj_offset(cachep); 1935 realobj = (char *)objp + obj_offset(cachep);
1986 size = obj_size(cachep); 1936 size = cachep->object_size;
1987 1937
1988 for (i = 0; i < size; i++) { 1938 for (i = 0; i < size; i++) {
1989 char exp = POISON_FREE; 1939 char exp = POISON_FREE;
@@ -2047,10 +1997,10 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slab
2047 1997
2048 if (cachep->flags & SLAB_POISON) { 1998 if (cachep->flags & SLAB_POISON) {
2049#ifdef CONFIG_DEBUG_PAGEALLOC 1999#ifdef CONFIG_DEBUG_PAGEALLOC
2050 if (cachep->buffer_size % PAGE_SIZE == 0 && 2000 if (cachep->size % PAGE_SIZE == 0 &&
2051 OFF_SLAB(cachep)) 2001 OFF_SLAB(cachep))
2052 kernel_map_pages(virt_to_page(objp), 2002 kernel_map_pages(virt_to_page(objp),
2053 cachep->buffer_size / PAGE_SIZE, 1); 2003 cachep->size / PAGE_SIZE, 1);
2054 else 2004 else
2055 check_poison_obj(cachep, objp); 2005 check_poison_obj(cachep, objp);
2056#else 2006#else
@@ -2194,10 +2144,10 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
2194 2144
2195static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) 2145static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
2196{ 2146{
2197 if (g_cpucache_up == FULL) 2147 if (slab_state >= FULL)
2198 return enable_cpucache(cachep, gfp); 2148 return enable_cpucache(cachep, gfp);
2199 2149
2200 if (g_cpucache_up == NONE) { 2150 if (slab_state == DOWN) {
2201 /* 2151 /*
2202 * Note: the first kmem_cache_create must create the cache 2152 * Note: the first kmem_cache_create must create the cache
2203 * that's used by kmalloc(24), otherwise the creation of 2153 * that's used by kmalloc(24), otherwise the creation of
@@ -2212,16 +2162,16 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
2212 */ 2162 */
2213 set_up_list3s(cachep, SIZE_AC); 2163 set_up_list3s(cachep, SIZE_AC);
2214 if (INDEX_AC == INDEX_L3) 2164 if (INDEX_AC == INDEX_L3)
2215 g_cpucache_up = PARTIAL_L3; 2165 slab_state = PARTIAL_L3;
2216 else 2166 else
2217 g_cpucache_up = PARTIAL_AC; 2167 slab_state = PARTIAL_ARRAYCACHE;
2218 } else { 2168 } else {
2219 cachep->array[smp_processor_id()] = 2169 cachep->array[smp_processor_id()] =
2220 kmalloc(sizeof(struct arraycache_init), gfp); 2170 kmalloc(sizeof(struct arraycache_init), gfp);
2221 2171
2222 if (g_cpucache_up == PARTIAL_AC) { 2172 if (slab_state == PARTIAL_ARRAYCACHE) {
2223 set_up_list3s(cachep, SIZE_L3); 2173 set_up_list3s(cachep, SIZE_L3);
2224 g_cpucache_up = PARTIAL_L3; 2174 slab_state = PARTIAL_L3;
2225 } else { 2175 } else {
2226 int node; 2176 int node;
2227 for_each_online_node(node) { 2177 for_each_online_node(node) {
@@ -2247,7 +2197,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
2247} 2197}
2248 2198
2249/** 2199/**
2250 * kmem_cache_create - Create a cache. 2200 * __kmem_cache_create - Create a cache.
2251 * @name: A string which is used in /proc/slabinfo to identify this cache. 2201 * @name: A string which is used in /proc/slabinfo to identify this cache.
2252 * @size: The size of objects to be created in this cache. 2202 * @size: The size of objects to be created in this cache.
2253 * @align: The required alignment for the objects. 2203 * @align: The required alignment for the objects.
@@ -2274,59 +2224,14 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
2274 * as davem. 2224 * as davem.
2275 */ 2225 */
2276struct kmem_cache * 2226struct kmem_cache *
2277kmem_cache_create (const char *name, size_t size, size_t align, 2227__kmem_cache_create (const char *name, size_t size, size_t align,
2278 unsigned long flags, void (*ctor)(void *)) 2228 unsigned long flags, void (*ctor)(void *))
2279{ 2229{
2280 size_t left_over, slab_size, ralign; 2230 size_t left_over, slab_size, ralign;
2281 struct kmem_cache *cachep = NULL, *pc; 2231 struct kmem_cache *cachep = NULL;
2282 gfp_t gfp; 2232 gfp_t gfp;
2283 2233
2284 /*
2285 * Sanity checks... these are all serious usage bugs.
2286 */
2287 if (!name || in_interrupt() || (size < BYTES_PER_WORD) ||
2288 size > KMALLOC_MAX_SIZE) {
2289 printk(KERN_ERR "%s: Early error in slab %s\n", __func__,
2290 name);
2291 BUG();
2292 }
2293
2294 /*
2295 * We use cache_chain_mutex to ensure a consistent view of
2296 * cpu_online_mask as well. Please see cpuup_callback
2297 */
2298 if (slab_is_available()) {
2299 get_online_cpus();
2300 mutex_lock(&cache_chain_mutex);
2301 }
2302
2303 list_for_each_entry(pc, &cache_chain, next) {
2304 char tmp;
2305 int res;
2306
2307 /*
2308 * This happens when the module gets unloaded and doesn't
2309 * destroy its slab cache and no-one else reuses the vmalloc
2310 * area of the module. Print a warning.
2311 */
2312 res = probe_kernel_address(pc->name, tmp);
2313 if (res) {
2314 printk(KERN_ERR
2315 "SLAB: cache with size %d has lost its name\n",
2316 pc->buffer_size);
2317 continue;
2318 }
2319
2320 if (!strcmp(pc->name, name)) {
2321 printk(KERN_ERR
2322 "kmem_cache_create: duplicate cache %s\n", name);
2323 dump_stack();
2324 goto oops;
2325 }
2326 }
2327
2328#if DEBUG 2234#if DEBUG
2329 WARN_ON(strchr(name, ' ')); /* It confuses parsers */
2330#if FORCED_DEBUG 2235#if FORCED_DEBUG
2331 /* 2236 /*
2332 * Enable redzoning and last user accounting, except for caches with 2237 * Enable redzoning and last user accounting, except for caches with
@@ -2415,11 +2320,12 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2415 /* Get cache's description obj. */ 2320 /* Get cache's description obj. */
2416 cachep = kmem_cache_zalloc(&cache_cache, gfp); 2321 cachep = kmem_cache_zalloc(&cache_cache, gfp);
2417 if (!cachep) 2322 if (!cachep)
2418 goto oops; 2323 return NULL;
2419 2324
2420 cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids]; 2325 cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids];
2326 cachep->object_size = size;
2327 cachep->align = align;
2421#if DEBUG 2328#if DEBUG
2422 cachep->obj_size = size;
2423 2329
2424 /* 2330 /*
2425 * Both debugging options require word-alignment which is calculated 2331 * Both debugging options require word-alignment which is calculated
@@ -2442,7 +2348,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2442 } 2348 }
2443#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC) 2349#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
2444 if (size >= malloc_sizes[INDEX_L3 + 1].cs_size 2350 if (size >= malloc_sizes[INDEX_L3 + 1].cs_size
2445 && cachep->obj_size > cache_line_size() && ALIGN(size, align) < PAGE_SIZE) { 2351 && cachep->object_size > cache_line_size() && ALIGN(size, align) < PAGE_SIZE) {
2446 cachep->obj_offset += PAGE_SIZE - ALIGN(size, align); 2352 cachep->obj_offset += PAGE_SIZE - ALIGN(size, align);
2447 size = PAGE_SIZE; 2353 size = PAGE_SIZE;
2448 } 2354 }
@@ -2471,8 +2377,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2471 printk(KERN_ERR 2377 printk(KERN_ERR
2472 "kmem_cache_create: couldn't create cache %s.\n", name); 2378 "kmem_cache_create: couldn't create cache %s.\n", name);
2473 kmem_cache_free(&cache_cache, cachep); 2379 kmem_cache_free(&cache_cache, cachep);
2474 cachep = NULL; 2380 return NULL;
2475 goto oops;
2476 } 2381 }
2477 slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t) 2382 slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t)
2478 + sizeof(struct slab), align); 2383 + sizeof(struct slab), align);
@@ -2508,10 +2413,10 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2508 cachep->colour = left_over / cachep->colour_off; 2413 cachep->colour = left_over / cachep->colour_off;
2509 cachep->slab_size = slab_size; 2414 cachep->slab_size = slab_size;
2510 cachep->flags = flags; 2415 cachep->flags = flags;
2511 cachep->gfpflags = 0; 2416 cachep->allocflags = 0;
2512 if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA)) 2417 if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA))
2513 cachep->gfpflags |= GFP_DMA; 2418 cachep->allocflags |= GFP_DMA;
2514 cachep->buffer_size = size; 2419 cachep->size = size;
2515 cachep->reciprocal_buffer_size = reciprocal_value(size); 2420 cachep->reciprocal_buffer_size = reciprocal_value(size);
2516 2421
2517 if (flags & CFLGS_OFF_SLAB) { 2422 if (flags & CFLGS_OFF_SLAB) {
@@ -2530,8 +2435,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2530 2435
2531 if (setup_cpu_cache(cachep, gfp)) { 2436 if (setup_cpu_cache(cachep, gfp)) {
2532 __kmem_cache_destroy(cachep); 2437 __kmem_cache_destroy(cachep);
2533 cachep = NULL; 2438 return NULL;
2534 goto oops;
2535 } 2439 }
2536 2440
2537 if (flags & SLAB_DEBUG_OBJECTS) { 2441 if (flags & SLAB_DEBUG_OBJECTS) {
@@ -2545,18 +2449,9 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2545 } 2449 }
2546 2450
2547 /* cache setup completed, link it into the list */ 2451 /* cache setup completed, link it into the list */
2548 list_add(&cachep->next, &cache_chain); 2452 list_add(&cachep->list, &slab_caches);
2549oops:
2550 if (!cachep && (flags & SLAB_PANIC))
2551 panic("kmem_cache_create(): failed to create slab `%s'\n",
2552 name);
2553 if (slab_is_available()) {
2554 mutex_unlock(&cache_chain_mutex);
2555 put_online_cpus();
2556 }
2557 return cachep; 2453 return cachep;
2558} 2454}
2559EXPORT_SYMBOL(kmem_cache_create);
2560 2455
2561#if DEBUG 2456#if DEBUG
2562static void check_irq_off(void) 2457static void check_irq_off(void)
@@ -2671,7 +2566,7 @@ out:
2671 return nr_freed; 2566 return nr_freed;
2672} 2567}
2673 2568
2674/* Called with cache_chain_mutex held to protect against cpu hotplug */ 2569/* Called with slab_mutex held to protect against cpu hotplug */
2675static int __cache_shrink(struct kmem_cache *cachep) 2570static int __cache_shrink(struct kmem_cache *cachep)
2676{ 2571{
2677 int ret = 0, i = 0; 2572 int ret = 0, i = 0;
@@ -2706,9 +2601,9 @@ int kmem_cache_shrink(struct kmem_cache *cachep)
2706 BUG_ON(!cachep || in_interrupt()); 2601 BUG_ON(!cachep || in_interrupt());
2707 2602
2708 get_online_cpus(); 2603 get_online_cpus();
2709 mutex_lock(&cache_chain_mutex); 2604 mutex_lock(&slab_mutex);
2710 ret = __cache_shrink(cachep); 2605 ret = __cache_shrink(cachep);
2711 mutex_unlock(&cache_chain_mutex); 2606 mutex_unlock(&slab_mutex);
2712 put_online_cpus(); 2607 put_online_cpus();
2713 return ret; 2608 return ret;
2714} 2609}
@@ -2736,15 +2631,15 @@ void kmem_cache_destroy(struct kmem_cache *cachep)
2736 2631
2737 /* Find the cache in the chain of caches. */ 2632 /* Find the cache in the chain of caches. */
2738 get_online_cpus(); 2633 get_online_cpus();
2739 mutex_lock(&cache_chain_mutex); 2634 mutex_lock(&slab_mutex);
2740 /* 2635 /*
2741 * the chain is never empty, cache_cache is never destroyed 2636 * the chain is never empty, cache_cache is never destroyed
2742 */ 2637 */
2743 list_del(&cachep->next); 2638 list_del(&cachep->list);
2744 if (__cache_shrink(cachep)) { 2639 if (__cache_shrink(cachep)) {
2745 slab_error(cachep, "Can't free all objects"); 2640 slab_error(cachep, "Can't free all objects");
2746 list_add(&cachep->next, &cache_chain); 2641 list_add(&cachep->list, &slab_caches);
2747 mutex_unlock(&cache_chain_mutex); 2642 mutex_unlock(&slab_mutex);
2748 put_online_cpus(); 2643 put_online_cpus();
2749 return; 2644 return;
2750 } 2645 }
@@ -2753,7 +2648,7 @@ void kmem_cache_destroy(struct kmem_cache *cachep)
2753 rcu_barrier(); 2648 rcu_barrier();
2754 2649
2755 __kmem_cache_destroy(cachep); 2650 __kmem_cache_destroy(cachep);
2756 mutex_unlock(&cache_chain_mutex); 2651 mutex_unlock(&slab_mutex);
2757 put_online_cpus(); 2652 put_online_cpus();
2758} 2653}
2759EXPORT_SYMBOL(kmem_cache_destroy); 2654EXPORT_SYMBOL(kmem_cache_destroy);
@@ -2840,10 +2735,10 @@ static void cache_init_objs(struct kmem_cache *cachep,
2840 slab_error(cachep, "constructor overwrote the" 2735 slab_error(cachep, "constructor overwrote the"
2841 " start of an object"); 2736 " start of an object");
2842 } 2737 }
2843 if ((cachep->buffer_size % PAGE_SIZE) == 0 && 2738 if ((cachep->size % PAGE_SIZE) == 0 &&
2844 OFF_SLAB(cachep) && cachep->flags & SLAB_POISON) 2739 OFF_SLAB(cachep) && cachep->flags & SLAB_POISON)
2845 kernel_map_pages(virt_to_page(objp), 2740 kernel_map_pages(virt_to_page(objp),
2846 cachep->buffer_size / PAGE_SIZE, 0); 2741 cachep->size / PAGE_SIZE, 0);
2847#else 2742#else
2848 if (cachep->ctor) 2743 if (cachep->ctor)
2849 cachep->ctor(objp); 2744 cachep->ctor(objp);
@@ -2857,9 +2752,9 @@ static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)
2857{ 2752{
2858 if (CONFIG_ZONE_DMA_FLAG) { 2753 if (CONFIG_ZONE_DMA_FLAG) {
2859 if (flags & GFP_DMA) 2754 if (flags & GFP_DMA)
2860 BUG_ON(!(cachep->gfpflags & GFP_DMA)); 2755 BUG_ON(!(cachep->allocflags & GFP_DMA));
2861 else 2756 else
2862 BUG_ON(cachep->gfpflags & GFP_DMA); 2757 BUG_ON(cachep->allocflags & GFP_DMA);
2863 } 2758 }
2864} 2759}
2865 2760
@@ -2918,8 +2813,8 @@ static void slab_map_pages(struct kmem_cache *cache, struct slab *slab,
2918 nr_pages <<= cache->gfporder; 2813 nr_pages <<= cache->gfporder;
2919 2814
2920 do { 2815 do {
2921 page_set_cache(page, cache); 2816 page->slab_cache = cache;
2922 page_set_slab(page, slab); 2817 page->slab_page = slab;
2923 page++; 2818 page++;
2924 } while (--nr_pages); 2819 } while (--nr_pages);
2925} 2820}
@@ -3057,7 +2952,7 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
3057 kfree_debugcheck(objp); 2952 kfree_debugcheck(objp);
3058 page = virt_to_head_page(objp); 2953 page = virt_to_head_page(objp);
3059 2954
3060 slabp = page_get_slab(page); 2955 slabp = page->slab_page;
3061 2956
3062 if (cachep->flags & SLAB_RED_ZONE) { 2957 if (cachep->flags & SLAB_RED_ZONE) {
3063 verify_redzone_free(cachep, objp); 2958 verify_redzone_free(cachep, objp);
@@ -3077,10 +2972,10 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
3077#endif 2972#endif
3078 if (cachep->flags & SLAB_POISON) { 2973 if (cachep->flags & SLAB_POISON) {
3079#ifdef CONFIG_DEBUG_PAGEALLOC 2974#ifdef CONFIG_DEBUG_PAGEALLOC
3080 if ((cachep->buffer_size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) { 2975 if ((cachep->size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) {
3081 store_stackinfo(cachep, objp, (unsigned long)caller); 2976 store_stackinfo(cachep, objp, (unsigned long)caller);
3082 kernel_map_pages(virt_to_page(objp), 2977 kernel_map_pages(virt_to_page(objp),
3083 cachep->buffer_size / PAGE_SIZE, 0); 2978 cachep->size / PAGE_SIZE, 0);
3084 } else { 2979 } else {
3085 poison_obj(cachep, objp, POISON_FREE); 2980 poison_obj(cachep, objp, POISON_FREE);
3086 } 2981 }
@@ -3230,9 +3125,9 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
3230 return objp; 3125 return objp;
3231 if (cachep->flags & SLAB_POISON) { 3126 if (cachep->flags & SLAB_POISON) {
3232#ifdef CONFIG_DEBUG_PAGEALLOC 3127#ifdef CONFIG_DEBUG_PAGEALLOC
3233 if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) 3128 if ((cachep->size % PAGE_SIZE) == 0 && OFF_SLAB(cachep))
3234 kernel_map_pages(virt_to_page(objp), 3129 kernel_map_pages(virt_to_page(objp),
3235 cachep->buffer_size / PAGE_SIZE, 1); 3130 cachep->size / PAGE_SIZE, 1);
3236 else 3131 else
3237 check_poison_obj(cachep, objp); 3132 check_poison_obj(cachep, objp);
3238#else 3133#else
@@ -3261,8 +3156,8 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
3261 struct slab *slabp; 3156 struct slab *slabp;
3262 unsigned objnr; 3157 unsigned objnr;
3263 3158
3264 slabp = page_get_slab(virt_to_head_page(objp)); 3159 slabp = virt_to_head_page(objp)->slab_page;
3265 objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size; 3160 objnr = (unsigned)(objp - slabp->s_mem) / cachep->size;
3266 slab_bufctl(slabp)[objnr] = BUFCTL_ACTIVE; 3161 slab_bufctl(slabp)[objnr] = BUFCTL_ACTIVE;
3267 } 3162 }
3268#endif 3163#endif
@@ -3285,7 +3180,7 @@ static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags)
3285 if (cachep == &cache_cache) 3180 if (cachep == &cache_cache)
3286 return false; 3181 return false;
3287 3182
3288 return should_failslab(obj_size(cachep), flags, cachep->flags); 3183 return should_failslab(cachep->object_size, flags, cachep->flags);
3289} 3184}
3290 3185
3291static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) 3186static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
@@ -3336,7 +3231,7 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
3336 if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD)) 3231 if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
3337 nid_alloc = cpuset_slab_spread_node(); 3232 nid_alloc = cpuset_slab_spread_node();
3338 else if (current->mempolicy) 3233 else if (current->mempolicy)
3339 nid_alloc = slab_node(current->mempolicy); 3234 nid_alloc = slab_node();
3340 if (nid_alloc != nid_here) 3235 if (nid_alloc != nid_here)
3341 return ____cache_alloc_node(cachep, flags, nid_alloc); 3236 return ____cache_alloc_node(cachep, flags, nid_alloc);
3342 return NULL; 3237 return NULL;
@@ -3368,7 +3263,7 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
3368 3263
3369retry_cpuset: 3264retry_cpuset:
3370 cpuset_mems_cookie = get_mems_allowed(); 3265 cpuset_mems_cookie = get_mems_allowed();
3371 zonelist = node_zonelist(slab_node(current->mempolicy), flags); 3266 zonelist = node_zonelist(slab_node(), flags);
3372 3267
3373retry: 3268retry:
3374 /* 3269 /*
@@ -3545,14 +3440,14 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
3545 out: 3440 out:
3546 local_irq_restore(save_flags); 3441 local_irq_restore(save_flags);
3547 ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller); 3442 ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
3548 kmemleak_alloc_recursive(ptr, obj_size(cachep), 1, cachep->flags, 3443 kmemleak_alloc_recursive(ptr, cachep->object_size, 1, cachep->flags,
3549 flags); 3444 flags);
3550 3445
3551 if (likely(ptr)) 3446 if (likely(ptr))
3552 kmemcheck_slab_alloc(cachep, flags, ptr, obj_size(cachep)); 3447 kmemcheck_slab_alloc(cachep, flags, ptr, cachep->object_size);
3553 3448
3554 if (unlikely((flags & __GFP_ZERO) && ptr)) 3449 if (unlikely((flags & __GFP_ZERO) && ptr))
3555 memset(ptr, 0, obj_size(cachep)); 3450 memset(ptr, 0, cachep->object_size);
3556 3451
3557 return ptr; 3452 return ptr;
3558} 3453}
@@ -3607,15 +3502,15 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
3607 objp = __do_cache_alloc(cachep, flags); 3502 objp = __do_cache_alloc(cachep, flags);
3608 local_irq_restore(save_flags); 3503 local_irq_restore(save_flags);
3609 objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller); 3504 objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
3610 kmemleak_alloc_recursive(objp, obj_size(cachep), 1, cachep->flags, 3505 kmemleak_alloc_recursive(objp, cachep->object_size, 1, cachep->flags,
3611 flags); 3506 flags);
3612 prefetchw(objp); 3507 prefetchw(objp);
3613 3508
3614 if (likely(objp)) 3509 if (likely(objp))
3615 kmemcheck_slab_alloc(cachep, flags, objp, obj_size(cachep)); 3510 kmemcheck_slab_alloc(cachep, flags, objp, cachep->object_size);
3616 3511
3617 if (unlikely((flags & __GFP_ZERO) && objp)) 3512 if (unlikely((flags & __GFP_ZERO) && objp))
3618 memset(objp, 0, obj_size(cachep)); 3513 memset(objp, 0, cachep->object_size);
3619 3514
3620 return objp; 3515 return objp;
3621} 3516}
@@ -3731,7 +3626,7 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp,
3731 kmemleak_free_recursive(objp, cachep->flags); 3626 kmemleak_free_recursive(objp, cachep->flags);
3732 objp = cache_free_debugcheck(cachep, objp, caller); 3627 objp = cache_free_debugcheck(cachep, objp, caller);
3733 3628
3734 kmemcheck_slab_free(cachep, objp, obj_size(cachep)); 3629 kmemcheck_slab_free(cachep, objp, cachep->object_size);
3735 3630
3736 /* 3631 /*
3737 * Skip calling cache_free_alien() when the platform is not numa. 3632 * Skip calling cache_free_alien() when the platform is not numa.
@@ -3766,7 +3661,7 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3766 void *ret = __cache_alloc(cachep, flags, __builtin_return_address(0)); 3661 void *ret = __cache_alloc(cachep, flags, __builtin_return_address(0));
3767 3662
3768 trace_kmem_cache_alloc(_RET_IP_, ret, 3663 trace_kmem_cache_alloc(_RET_IP_, ret,
3769 obj_size(cachep), cachep->buffer_size, flags); 3664 cachep->object_size, cachep->size, flags);
3770 3665
3771 return ret; 3666 return ret;
3772} 3667}
@@ -3794,7 +3689,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
3794 __builtin_return_address(0)); 3689 __builtin_return_address(0));
3795 3690
3796 trace_kmem_cache_alloc_node(_RET_IP_, ret, 3691 trace_kmem_cache_alloc_node(_RET_IP_, ret,
3797 obj_size(cachep), cachep->buffer_size, 3692 cachep->object_size, cachep->size,
3798 flags, nodeid); 3693 flags, nodeid);
3799 3694
3800 return ret; 3695 return ret;
@@ -3876,7 +3771,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
3876 ret = __cache_alloc(cachep, flags, caller); 3771 ret = __cache_alloc(cachep, flags, caller);
3877 3772
3878 trace_kmalloc((unsigned long) caller, ret, 3773 trace_kmalloc((unsigned long) caller, ret,
3879 size, cachep->buffer_size, flags); 3774 size, cachep->size, flags);
3880 3775
3881 return ret; 3776 return ret;
3882} 3777}
@@ -3916,9 +3811,9 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
3916 unsigned long flags; 3811 unsigned long flags;
3917 3812
3918 local_irq_save(flags); 3813 local_irq_save(flags);
3919 debug_check_no_locks_freed(objp, obj_size(cachep)); 3814 debug_check_no_locks_freed(objp, cachep->object_size);
3920 if (!(cachep->flags & SLAB_DEBUG_OBJECTS)) 3815 if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
3921 debug_check_no_obj_freed(objp, obj_size(cachep)); 3816 debug_check_no_obj_freed(objp, cachep->object_size);
3922 __cache_free(cachep, objp, __builtin_return_address(0)); 3817 __cache_free(cachep, objp, __builtin_return_address(0));
3923 local_irq_restore(flags); 3818 local_irq_restore(flags);
3924 3819
@@ -3947,8 +3842,9 @@ void kfree(const void *objp)
3947 local_irq_save(flags); 3842 local_irq_save(flags);
3948 kfree_debugcheck(objp); 3843 kfree_debugcheck(objp);
3949 c = virt_to_cache(objp); 3844 c = virt_to_cache(objp);
3950 debug_check_no_locks_freed(objp, obj_size(c)); 3845 debug_check_no_locks_freed(objp, c->object_size);
3951 debug_check_no_obj_freed(objp, obj_size(c)); 3846
3847 debug_check_no_obj_freed(objp, c->object_size);
3952 __cache_free(c, (void *)objp, __builtin_return_address(0)); 3848 __cache_free(c, (void *)objp, __builtin_return_address(0));
3953 local_irq_restore(flags); 3849 local_irq_restore(flags);
3954} 3850}
@@ -3956,7 +3852,7 @@ EXPORT_SYMBOL(kfree);
3956 3852
3957unsigned int kmem_cache_size(struct kmem_cache *cachep) 3853unsigned int kmem_cache_size(struct kmem_cache *cachep)
3958{ 3854{
3959 return obj_size(cachep); 3855 return cachep->object_size;
3960} 3856}
3961EXPORT_SYMBOL(kmem_cache_size); 3857EXPORT_SYMBOL(kmem_cache_size);
3962 3858
@@ -4030,7 +3926,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
4030 return 0; 3926 return 0;
4031 3927
4032fail: 3928fail:
4033 if (!cachep->next.next) { 3929 if (!cachep->list.next) {
4034 /* Cache is not active yet. Roll back what we did */ 3930 /* Cache is not active yet. Roll back what we did */
4035 node--; 3931 node--;
4036 while (node >= 0) { 3932 while (node >= 0) {
@@ -4065,7 +3961,7 @@ static void do_ccupdate_local(void *info)
4065 new->new[smp_processor_id()] = old; 3961 new->new[smp_processor_id()] = old;
4066} 3962}
4067 3963
4068/* Always called with the cache_chain_mutex held */ 3964/* Always called with the slab_mutex held */
4069static int do_tune_cpucache(struct kmem_cache *cachep, int limit, 3965static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
4070 int batchcount, int shared, gfp_t gfp) 3966 int batchcount, int shared, gfp_t gfp)
4071{ 3967{
@@ -4109,7 +4005,7 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
4109 return alloc_kmemlist(cachep, gfp); 4005 return alloc_kmemlist(cachep, gfp);
4110} 4006}
4111 4007
4112/* Called with cache_chain_mutex held always */ 4008/* Called with slab_mutex held always */
4113static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp) 4009static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
4114{ 4010{
4115 int err; 4011 int err;
@@ -4124,13 +4020,13 @@ static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
4124 * The numbers are guessed, we should auto-tune as described by 4020 * The numbers are guessed, we should auto-tune as described by
4125 * Bonwick. 4021 * Bonwick.
4126 */ 4022 */
4127 if (cachep->buffer_size > 131072) 4023 if (cachep->size > 131072)
4128 limit = 1; 4024 limit = 1;
4129 else if (cachep->buffer_size > PAGE_SIZE) 4025 else if (cachep->size > PAGE_SIZE)
4130 limit = 8; 4026 limit = 8;
4131 else if (cachep->buffer_size > 1024) 4027 else if (cachep->size > 1024)
4132 limit = 24; 4028 limit = 24;
4133 else if (cachep->buffer_size > 256) 4029 else if (cachep->size > 256)
4134 limit = 54; 4030 limit = 54;
4135 else 4031 else
4136 limit = 120; 4032 limit = 120;
@@ -4145,7 +4041,7 @@ static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
4145 * to a larger limit. Thus disabled by default. 4041 * to a larger limit. Thus disabled by default.
4146 */ 4042 */
4147 shared = 0; 4043 shared = 0;
4148 if (cachep->buffer_size <= PAGE_SIZE && num_possible_cpus() > 1) 4044 if (cachep->size <= PAGE_SIZE && num_possible_cpus() > 1)
4149 shared = 8; 4045 shared = 8;
4150 4046
4151#if DEBUG 4047#if DEBUG
@@ -4211,11 +4107,11 @@ static void cache_reap(struct work_struct *w)
4211 int node = numa_mem_id(); 4107 int node = numa_mem_id();
4212 struct delayed_work *work = to_delayed_work(w); 4108 struct delayed_work *work = to_delayed_work(w);
4213 4109
4214 if (!mutex_trylock(&cache_chain_mutex)) 4110 if (!mutex_trylock(&slab_mutex))
4215 /* Give up. Setup the next iteration. */ 4111 /* Give up. Setup the next iteration. */
4216 goto out; 4112 goto out;
4217 4113
4218 list_for_each_entry(searchp, &cache_chain, next) { 4114 list_for_each_entry(searchp, &slab_caches, list) {
4219 check_irq_on(); 4115 check_irq_on();
4220 4116
4221 /* 4117 /*
@@ -4253,7 +4149,7 @@ next:
4253 cond_resched(); 4149 cond_resched();
4254 } 4150 }
4255 check_irq_on(); 4151 check_irq_on();
4256 mutex_unlock(&cache_chain_mutex); 4152 mutex_unlock(&slab_mutex);
4257 next_reap_node(); 4153 next_reap_node();
4258out: 4154out:
4259 /* Set up the next iteration */ 4155 /* Set up the next iteration */
@@ -4289,26 +4185,26 @@ static void *s_start(struct seq_file *m, loff_t *pos)
4289{ 4185{
4290 loff_t n = *pos; 4186 loff_t n = *pos;
4291 4187
4292 mutex_lock(&cache_chain_mutex); 4188 mutex_lock(&slab_mutex);
4293 if (!n) 4189 if (!n)
4294 print_slabinfo_header(m); 4190 print_slabinfo_header(m);
4295 4191
4296 return seq_list_start(&cache_chain, *pos); 4192 return seq_list_start(&slab_caches, *pos);
4297} 4193}
4298 4194
4299static void *s_next(struct seq_file *m, void *p, loff_t *pos) 4195static void *s_next(struct seq_file *m, void *p, loff_t *pos)
4300{ 4196{
4301 return seq_list_next(p, &cache_chain, pos); 4197 return seq_list_next(p, &slab_caches, pos);
4302} 4198}
4303 4199
4304static void s_stop(struct seq_file *m, void *p) 4200static void s_stop(struct seq_file *m, void *p)
4305{ 4201{
4306 mutex_unlock(&cache_chain_mutex); 4202 mutex_unlock(&slab_mutex);
4307} 4203}
4308 4204
4309static int s_show(struct seq_file *m, void *p) 4205static int s_show(struct seq_file *m, void *p)
4310{ 4206{
4311 struct kmem_cache *cachep = list_entry(p, struct kmem_cache, next); 4207 struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list);
4312 struct slab *slabp; 4208 struct slab *slabp;
4313 unsigned long active_objs; 4209 unsigned long active_objs;
4314 unsigned long num_objs; 4210 unsigned long num_objs;
@@ -4364,7 +4260,7 @@ static int s_show(struct seq_file *m, void *p)
4364 printk(KERN_ERR "slab: cache %s error: %s\n", name, error); 4260 printk(KERN_ERR "slab: cache %s error: %s\n", name, error);
4365 4261
4366 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", 4262 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
4367 name, active_objs, num_objs, cachep->buffer_size, 4263 name, active_objs, num_objs, cachep->size,
4368 cachep->num, (1 << cachep->gfporder)); 4264 cachep->num, (1 << cachep->gfporder));
4369 seq_printf(m, " : tunables %4u %4u %4u", 4265 seq_printf(m, " : tunables %4u %4u %4u",
4370 cachep->limit, cachep->batchcount, cachep->shared); 4266 cachep->limit, cachep->batchcount, cachep->shared);
@@ -4454,9 +4350,9 @@ static ssize_t slabinfo_write(struct file *file, const char __user *buffer,
4454 return -EINVAL; 4350 return -EINVAL;
4455 4351
4456 /* Find the cache in the chain of caches. */ 4352 /* Find the cache in the chain of caches. */
4457 mutex_lock(&cache_chain_mutex); 4353 mutex_lock(&slab_mutex);
4458 res = -EINVAL; 4354 res = -EINVAL;
4459 list_for_each_entry(cachep, &cache_chain, next) { 4355 list_for_each_entry(cachep, &slab_caches, list) {
4460 if (!strcmp(cachep->name, kbuf)) { 4356 if (!strcmp(cachep->name, kbuf)) {
4461 if (limit < 1 || batchcount < 1 || 4357 if (limit < 1 || batchcount < 1 ||
4462 batchcount > limit || shared < 0) { 4358 batchcount > limit || shared < 0) {
@@ -4469,7 +4365,7 @@ static ssize_t slabinfo_write(struct file *file, const char __user *buffer,
4469 break; 4365 break;
4470 } 4366 }
4471 } 4367 }
4472 mutex_unlock(&cache_chain_mutex); 4368 mutex_unlock(&slab_mutex);
4473 if (res >= 0) 4369 if (res >= 0)
4474 res = count; 4370 res = count;
4475 return res; 4371 return res;
@@ -4492,8 +4388,8 @@ static const struct file_operations proc_slabinfo_operations = {
4492 4388
4493static void *leaks_start(struct seq_file *m, loff_t *pos) 4389static void *leaks_start(struct seq_file *m, loff_t *pos)
4494{ 4390{
4495 mutex_lock(&cache_chain_mutex); 4391 mutex_lock(&slab_mutex);
4496 return seq_list_start(&cache_chain, *pos); 4392 return seq_list_start(&slab_caches, *pos);
4497} 4393}
4498 4394
4499static inline int add_caller(unsigned long *n, unsigned long v) 4395static inline int add_caller(unsigned long *n, unsigned long v)
@@ -4532,7 +4428,7 @@ static void handle_slab(unsigned long *n, struct kmem_cache *c, struct slab *s)
4532 int i; 4428 int i;
4533 if (n[0] == n[1]) 4429 if (n[0] == n[1])
4534 return; 4430 return;
4535 for (i = 0, p = s->s_mem; i < c->num; i++, p += c->buffer_size) { 4431 for (i = 0, p = s->s_mem; i < c->num; i++, p += c->size) {
4536 if (slab_bufctl(s)[i] != BUFCTL_ACTIVE) 4432 if (slab_bufctl(s)[i] != BUFCTL_ACTIVE)
4537 continue; 4433 continue;
4538 if (!add_caller(n, (unsigned long)*dbg_userword(c, p))) 4434 if (!add_caller(n, (unsigned long)*dbg_userword(c, p)))
@@ -4558,7 +4454,7 @@ static void show_symbol(struct seq_file *m, unsigned long address)
4558 4454
4559static int leaks_show(struct seq_file *m, void *p) 4455static int leaks_show(struct seq_file *m, void *p)
4560{ 4456{
4561 struct kmem_cache *cachep = list_entry(p, struct kmem_cache, next); 4457 struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list);
4562 struct slab *slabp; 4458 struct slab *slabp;
4563 struct kmem_list3 *l3; 4459 struct kmem_list3 *l3;
4564 const char *name; 4460 const char *name;
@@ -4592,17 +4488,17 @@ static int leaks_show(struct seq_file *m, void *p)
4592 name = cachep->name; 4488 name = cachep->name;
4593 if (n[0] == n[1]) { 4489 if (n[0] == n[1]) {
4594 /* Increase the buffer size */ 4490 /* Increase the buffer size */
4595 mutex_unlock(&cache_chain_mutex); 4491 mutex_unlock(&slab_mutex);
4596 m->private = kzalloc(n[0] * 4 * sizeof(unsigned long), GFP_KERNEL); 4492 m->private = kzalloc(n[0] * 4 * sizeof(unsigned long), GFP_KERNEL);
4597 if (!m->private) { 4493 if (!m->private) {
4598 /* Too bad, we are really out */ 4494 /* Too bad, we are really out */
4599 m->private = n; 4495 m->private = n;
4600 mutex_lock(&cache_chain_mutex); 4496 mutex_lock(&slab_mutex);
4601 return -ENOMEM; 4497 return -ENOMEM;
4602 } 4498 }
4603 *(unsigned long *)m->private = n[0] * 2; 4499 *(unsigned long *)m->private = n[0] * 2;
4604 kfree(n); 4500 kfree(n);
4605 mutex_lock(&cache_chain_mutex); 4501 mutex_lock(&slab_mutex);
4606 /* Now make sure this entry will be retried */ 4502 /* Now make sure this entry will be retried */
4607 m->count = m->size; 4503 m->count = m->size;
4608 return 0; 4504 return 0;
@@ -4677,6 +4573,6 @@ size_t ksize(const void *objp)
4677 if (unlikely(objp == ZERO_SIZE_PTR)) 4573 if (unlikely(objp == ZERO_SIZE_PTR))
4678 return 0; 4574 return 0;
4679 4575
4680 return obj_size(virt_to_cache(objp)); 4576 return virt_to_cache(objp)->object_size;
4681} 4577}
4682EXPORT_SYMBOL(ksize); 4578EXPORT_SYMBOL(ksize);
diff --git a/mm/slab.h b/mm/slab.h
new file mode 100644
index 000000000000..db7848caaa25
--- /dev/null
+++ b/mm/slab.h
@@ -0,0 +1,33 @@
1#ifndef MM_SLAB_H
2#define MM_SLAB_H
3/*
4 * Internal slab definitions
5 */
6
7/*
8 * State of the slab allocator.
9 *
10 * This is used to describe the states of the allocator during bootup.
11 * Allocators use this to gradually bootstrap themselves. Most allocators
12 * have the problem that the structures used for managing slab caches are
13 * allocated from slab caches themselves.
14 */
15enum slab_state {
16 DOWN, /* No slab functionality yet */
17 PARTIAL, /* SLUB: kmem_cache_node available */
18 PARTIAL_ARRAYCACHE, /* SLAB: kmalloc size for arraycache available */
19 PARTIAL_L3, /* SLAB: kmalloc size for l3 struct available */
20 UP, /* Slab caches usable but not all extras yet */
21 FULL /* Everything is working */
22};
23
24extern enum slab_state slab_state;
25
26/* The slab cache mutex protects the management structures during changes */
27extern struct mutex slab_mutex;
28extern struct list_head slab_caches;
29
30struct kmem_cache *__kmem_cache_create(const char *name, size_t size,
31 size_t align, unsigned long flags, void (*ctor)(void *));
32
33#endif
diff --git a/mm/slab_common.c b/mm/slab_common.c
new file mode 100644
index 000000000000..aa3ca5bb01b5
--- /dev/null
+++ b/mm/slab_common.c
@@ -0,0 +1,120 @@
1/*
2 * Slab allocator functions that are independent of the allocator strategy
3 *
4 * (C) 2012 Christoph Lameter <cl@linux.com>
5 */
6#include <linux/slab.h>
7
8#include <linux/mm.h>
9#include <linux/poison.h>
10#include <linux/interrupt.h>
11#include <linux/memory.h>
12#include <linux/compiler.h>
13#include <linux/module.h>
14#include <linux/cpu.h>
15#include <linux/uaccess.h>
16#include <asm/cacheflush.h>
17#include <asm/tlbflush.h>
18#include <asm/page.h>
19
20#include "slab.h"
21
22enum slab_state slab_state;
23LIST_HEAD(slab_caches);
24DEFINE_MUTEX(slab_mutex);
25
26/*
27 * kmem_cache_create - Create a cache.
28 * @name: A string which is used in /proc/slabinfo to identify this cache.
29 * @size: The size of objects to be created in this cache.
30 * @align: The required alignment for the objects.
31 * @flags: SLAB flags
32 * @ctor: A constructor for the objects.
33 *
34 * Returns a ptr to the cache on success, NULL on failure.
35 * Cannot be called within a interrupt, but can be interrupted.
36 * The @ctor is run when new pages are allocated by the cache.
37 *
38 * The flags are
39 *
40 * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
41 * to catch references to uninitialised memory.
42 *
43 * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
44 * for buffer overruns.
45 *
46 * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
47 * cacheline. This can be beneficial if you're counting cycles as closely
48 * as davem.
49 */
50
51struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align,
52 unsigned long flags, void (*ctor)(void *))
53{
54 struct kmem_cache *s = NULL;
55
56#ifdef CONFIG_DEBUG_VM
57 if (!name || in_interrupt() || size < sizeof(void *) ||
58 size > KMALLOC_MAX_SIZE) {
59 printk(KERN_ERR "kmem_cache_create(%s) integrity check"
60 " failed\n", name);
61 goto out;
62 }
63#endif
64
65 get_online_cpus();
66 mutex_lock(&slab_mutex);
67
68#ifdef CONFIG_DEBUG_VM
69 list_for_each_entry(s, &slab_caches, list) {
70 char tmp;
71 int res;
72
73 /*
74 * This happens when the module gets unloaded and doesn't
75 * destroy its slab cache and no-one else reuses the vmalloc
76 * area of the module. Print a warning.
77 */
78 res = probe_kernel_address(s->name, tmp);
79 if (res) {
80 printk(KERN_ERR
81 "Slab cache with size %d has lost its name\n",
82 s->object_size);
83 continue;
84 }
85
86 if (!strcmp(s->name, name)) {
87 printk(KERN_ERR "kmem_cache_create(%s): Cache name"
88 " already exists.\n",
89 name);
90 dump_stack();
91 s = NULL;
92 goto oops;
93 }
94 }
95
96 WARN_ON(strchr(name, ' ')); /* It confuses parsers */
97#endif
98
99 s = __kmem_cache_create(name, size, align, flags, ctor);
100
101#ifdef CONFIG_DEBUG_VM
102oops:
103#endif
104 mutex_unlock(&slab_mutex);
105 put_online_cpus();
106
107#ifdef CONFIG_DEBUG_VM
108out:
109#endif
110 if (!s && (flags & SLAB_PANIC))
111 panic("kmem_cache_create: Failed to create slab '%s'\n", name);
112
113 return s;
114}
115EXPORT_SYMBOL(kmem_cache_create);
116
117int slab_is_available(void)
118{
119 return slab_state >= UP;
120}
diff --git a/mm/slob.c b/mm/slob.c
index 8105be42cad1..45d4ca79933a 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -59,6 +59,8 @@
59 59
60#include <linux/kernel.h> 60#include <linux/kernel.h>
61#include <linux/slab.h> 61#include <linux/slab.h>
62#include "slab.h"
63
62#include <linux/mm.h> 64#include <linux/mm.h>
63#include <linux/swap.h> /* struct reclaim_state */ 65#include <linux/swap.h> /* struct reclaim_state */
64#include <linux/cache.h> 66#include <linux/cache.h>
@@ -92,36 +94,6 @@ struct slob_block {
92typedef struct slob_block slob_t; 94typedef struct slob_block slob_t;
93 95
94/* 96/*
95 * We use struct page fields to manage some slob allocation aspects,
96 * however to avoid the horrible mess in include/linux/mm_types.h, we'll
97 * just define our own struct page type variant here.
98 */
99struct slob_page {
100 union {
101 struct {
102 unsigned long flags; /* mandatory */
103 atomic_t _count; /* mandatory */
104 slobidx_t units; /* free units left in page */
105 unsigned long pad[2];
106 slob_t *free; /* first free slob_t in page */
107 struct list_head list; /* linked list of free pages */
108 };
109 struct page page;
110 };
111};
112static inline void struct_slob_page_wrong_size(void)
113{ BUILD_BUG_ON(sizeof(struct slob_page) != sizeof(struct page)); }
114
115/*
116 * free_slob_page: call before a slob_page is returned to the page allocator.
117 */
118static inline void free_slob_page(struct slob_page *sp)
119{
120 reset_page_mapcount(&sp->page);
121 sp->page.mapping = NULL;
122}
123
124/*
125 * All partially free slob pages go on these lists. 97 * All partially free slob pages go on these lists.
126 */ 98 */
127#define SLOB_BREAK1 256 99#define SLOB_BREAK1 256
@@ -131,46 +103,23 @@ static LIST_HEAD(free_slob_medium);
131static LIST_HEAD(free_slob_large); 103static LIST_HEAD(free_slob_large);
132 104
133/* 105/*
134 * is_slob_page: True for all slob pages (false for bigblock pages)
135 */
136static inline int is_slob_page(struct slob_page *sp)
137{
138 return PageSlab((struct page *)sp);
139}
140
141static inline void set_slob_page(struct slob_page *sp)
142{
143 __SetPageSlab((struct page *)sp);
144}
145
146static inline void clear_slob_page(struct slob_page *sp)
147{
148 __ClearPageSlab((struct page *)sp);
149}
150
151static inline struct slob_page *slob_page(const void *addr)
152{
153 return (struct slob_page *)virt_to_page(addr);
154}
155
156/*
157 * slob_page_free: true for pages on free_slob_pages list. 106 * slob_page_free: true for pages on free_slob_pages list.
158 */ 107 */
159static inline int slob_page_free(struct slob_page *sp) 108static inline int slob_page_free(struct page *sp)
160{ 109{
161 return PageSlobFree((struct page *)sp); 110 return PageSlobFree(sp);
162} 111}
163 112
164static void set_slob_page_free(struct slob_page *sp, struct list_head *list) 113static void set_slob_page_free(struct page *sp, struct list_head *list)
165{ 114{
166 list_add(&sp->list, list); 115 list_add(&sp->list, list);
167 __SetPageSlobFree((struct page *)sp); 116 __SetPageSlobFree(sp);
168} 117}
169 118
170static inline void clear_slob_page_free(struct slob_page *sp) 119static inline void clear_slob_page_free(struct page *sp)
171{ 120{
172 list_del(&sp->list); 121 list_del(&sp->list);
173 __ClearPageSlobFree((struct page *)sp); 122 __ClearPageSlobFree(sp);
174} 123}
175 124
176#define SLOB_UNIT sizeof(slob_t) 125#define SLOB_UNIT sizeof(slob_t)
@@ -267,12 +216,12 @@ static void slob_free_pages(void *b, int order)
267/* 216/*
268 * Allocate a slob block within a given slob_page sp. 217 * Allocate a slob block within a given slob_page sp.
269 */ 218 */
270static void *slob_page_alloc(struct slob_page *sp, size_t size, int align) 219static void *slob_page_alloc(struct page *sp, size_t size, int align)
271{ 220{
272 slob_t *prev, *cur, *aligned = NULL; 221 slob_t *prev, *cur, *aligned = NULL;
273 int delta = 0, units = SLOB_UNITS(size); 222 int delta = 0, units = SLOB_UNITS(size);
274 223
275 for (prev = NULL, cur = sp->free; ; prev = cur, cur = slob_next(cur)) { 224 for (prev = NULL, cur = sp->freelist; ; prev = cur, cur = slob_next(cur)) {
276 slobidx_t avail = slob_units(cur); 225 slobidx_t avail = slob_units(cur);
277 226
278 if (align) { 227 if (align) {
@@ -296,12 +245,12 @@ static void *slob_page_alloc(struct slob_page *sp, size_t size, int align)
296 if (prev) 245 if (prev)
297 set_slob(prev, slob_units(prev), next); 246 set_slob(prev, slob_units(prev), next);
298 else 247 else
299 sp->free = next; 248 sp->freelist = next;
300 } else { /* fragment */ 249 } else { /* fragment */
301 if (prev) 250 if (prev)
302 set_slob(prev, slob_units(prev), cur + units); 251 set_slob(prev, slob_units(prev), cur + units);
303 else 252 else
304 sp->free = cur + units; 253 sp->freelist = cur + units;
305 set_slob(cur + units, avail - units, next); 254 set_slob(cur + units, avail - units, next);
306 } 255 }
307 256
@@ -320,7 +269,7 @@ static void *slob_page_alloc(struct slob_page *sp, size_t size, int align)
320 */ 269 */
321static void *slob_alloc(size_t size, gfp_t gfp, int align, int node) 270static void *slob_alloc(size_t size, gfp_t gfp, int align, int node)
322{ 271{
323 struct slob_page *sp; 272 struct page *sp;
324 struct list_head *prev; 273 struct list_head *prev;
325 struct list_head *slob_list; 274 struct list_head *slob_list;
326 slob_t *b = NULL; 275 slob_t *b = NULL;
@@ -341,7 +290,7 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node)
341 * If there's a node specification, search for a partial 290 * If there's a node specification, search for a partial
342 * page with a matching node id in the freelist. 291 * page with a matching node id in the freelist.
343 */ 292 */
344 if (node != -1 && page_to_nid(&sp->page) != node) 293 if (node != -1 && page_to_nid(sp) != node)
345 continue; 294 continue;
346#endif 295#endif
347 /* Enough room on this page? */ 296 /* Enough room on this page? */
@@ -369,12 +318,12 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node)
369 b = slob_new_pages(gfp & ~__GFP_ZERO, 0, node); 318 b = slob_new_pages(gfp & ~__GFP_ZERO, 0, node);
370 if (!b) 319 if (!b)
371 return NULL; 320 return NULL;
372 sp = slob_page(b); 321 sp = virt_to_page(b);
373 set_slob_page(sp); 322 __SetPageSlab(sp);
374 323
375 spin_lock_irqsave(&slob_lock, flags); 324 spin_lock_irqsave(&slob_lock, flags);
376 sp->units = SLOB_UNITS(PAGE_SIZE); 325 sp->units = SLOB_UNITS(PAGE_SIZE);
377 sp->free = b; 326 sp->freelist = b;
378 INIT_LIST_HEAD(&sp->list); 327 INIT_LIST_HEAD(&sp->list);
379 set_slob(b, SLOB_UNITS(PAGE_SIZE), b + SLOB_UNITS(PAGE_SIZE)); 328 set_slob(b, SLOB_UNITS(PAGE_SIZE), b + SLOB_UNITS(PAGE_SIZE));
380 set_slob_page_free(sp, slob_list); 329 set_slob_page_free(sp, slob_list);
@@ -392,7 +341,7 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node)
392 */ 341 */
393static void slob_free(void *block, int size) 342static void slob_free(void *block, int size)
394{ 343{
395 struct slob_page *sp; 344 struct page *sp;
396 slob_t *prev, *next, *b = (slob_t *)block; 345 slob_t *prev, *next, *b = (slob_t *)block;
397 slobidx_t units; 346 slobidx_t units;
398 unsigned long flags; 347 unsigned long flags;
@@ -402,7 +351,7 @@ static void slob_free(void *block, int size)
402 return; 351 return;
403 BUG_ON(!size); 352 BUG_ON(!size);
404 353
405 sp = slob_page(block); 354 sp = virt_to_page(block);
406 units = SLOB_UNITS(size); 355 units = SLOB_UNITS(size);
407 356
408 spin_lock_irqsave(&slob_lock, flags); 357 spin_lock_irqsave(&slob_lock, flags);
@@ -412,8 +361,8 @@ static void slob_free(void *block, int size)
412 if (slob_page_free(sp)) 361 if (slob_page_free(sp))
413 clear_slob_page_free(sp); 362 clear_slob_page_free(sp);
414 spin_unlock_irqrestore(&slob_lock, flags); 363 spin_unlock_irqrestore(&slob_lock, flags);
415 clear_slob_page(sp); 364 __ClearPageSlab(sp);
416 free_slob_page(sp); 365 reset_page_mapcount(sp);
417 slob_free_pages(b, 0); 366 slob_free_pages(b, 0);
418 return; 367 return;
419 } 368 }
@@ -421,7 +370,7 @@ static void slob_free(void *block, int size)
421 if (!slob_page_free(sp)) { 370 if (!slob_page_free(sp)) {
422 /* This slob page is about to become partially free. Easy! */ 371 /* This slob page is about to become partially free. Easy! */
423 sp->units = units; 372 sp->units = units;
424 sp->free = b; 373 sp->freelist = b;
425 set_slob(b, units, 374 set_slob(b, units,
426 (void *)((unsigned long)(b + 375 (void *)((unsigned long)(b +
427 SLOB_UNITS(PAGE_SIZE)) & PAGE_MASK)); 376 SLOB_UNITS(PAGE_SIZE)) & PAGE_MASK));
@@ -441,15 +390,15 @@ static void slob_free(void *block, int size)
441 */ 390 */
442 sp->units += units; 391 sp->units += units;
443 392
444 if (b < sp->free) { 393 if (b < (slob_t *)sp->freelist) {
445 if (b + units == sp->free) { 394 if (b + units == sp->freelist) {
446 units += slob_units(sp->free); 395 units += slob_units(sp->freelist);
447 sp->free = slob_next(sp->free); 396 sp->freelist = slob_next(sp->freelist);
448 } 397 }
449 set_slob(b, units, sp->free); 398 set_slob(b, units, sp->freelist);
450 sp->free = b; 399 sp->freelist = b;
451 } else { 400 } else {
452 prev = sp->free; 401 prev = sp->freelist;
453 next = slob_next(prev); 402 next = slob_next(prev);
454 while (b > next) { 403 while (b > next) {
455 prev = next; 404 prev = next;
@@ -522,7 +471,7 @@ EXPORT_SYMBOL(__kmalloc_node);
522 471
523void kfree(const void *block) 472void kfree(const void *block)
524{ 473{
525 struct slob_page *sp; 474 struct page *sp;
526 475
527 trace_kfree(_RET_IP_, block); 476 trace_kfree(_RET_IP_, block);
528 477
@@ -530,43 +479,36 @@ void kfree(const void *block)
530 return; 479 return;
531 kmemleak_free(block); 480 kmemleak_free(block);
532 481
533 sp = slob_page(block); 482 sp = virt_to_page(block);
534 if (is_slob_page(sp)) { 483 if (PageSlab(sp)) {
535 int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); 484 int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
536 unsigned int *m = (unsigned int *)(block - align); 485 unsigned int *m = (unsigned int *)(block - align);
537 slob_free(m, *m + align); 486 slob_free(m, *m + align);
538 } else 487 } else
539 put_page(&sp->page); 488 put_page(sp);
540} 489}
541EXPORT_SYMBOL(kfree); 490EXPORT_SYMBOL(kfree);
542 491
543/* can't use ksize for kmem_cache_alloc memory, only kmalloc */ 492/* can't use ksize for kmem_cache_alloc memory, only kmalloc */
544size_t ksize(const void *block) 493size_t ksize(const void *block)
545{ 494{
546 struct slob_page *sp; 495 struct page *sp;
547 496
548 BUG_ON(!block); 497 BUG_ON(!block);
549 if (unlikely(block == ZERO_SIZE_PTR)) 498 if (unlikely(block == ZERO_SIZE_PTR))
550 return 0; 499 return 0;
551 500
552 sp = slob_page(block); 501 sp = virt_to_page(block);
553 if (is_slob_page(sp)) { 502 if (PageSlab(sp)) {
554 int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); 503 int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
555 unsigned int *m = (unsigned int *)(block - align); 504 unsigned int *m = (unsigned int *)(block - align);
556 return SLOB_UNITS(*m) * SLOB_UNIT; 505 return SLOB_UNITS(*m) * SLOB_UNIT;
557 } else 506 } else
558 return sp->page.private; 507 return sp->private;
559} 508}
560EXPORT_SYMBOL(ksize); 509EXPORT_SYMBOL(ksize);
561 510
562struct kmem_cache { 511struct kmem_cache *__kmem_cache_create(const char *name, size_t size,
563 unsigned int size, align;
564 unsigned long flags;
565 const char *name;
566 void (*ctor)(void *);
567};
568
569struct kmem_cache *kmem_cache_create(const char *name, size_t size,
570 size_t align, unsigned long flags, void (*ctor)(void *)) 512 size_t align, unsigned long flags, void (*ctor)(void *))
571{ 513{
572 struct kmem_cache *c; 514 struct kmem_cache *c;
@@ -589,13 +531,12 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
589 c->align = ARCH_SLAB_MINALIGN; 531 c->align = ARCH_SLAB_MINALIGN;
590 if (c->align < align) 532 if (c->align < align)
591 c->align = align; 533 c->align = align;
592 } else if (flags & SLAB_PANIC)
593 panic("Cannot create slab cache %s\n", name);
594 534
595 kmemleak_alloc(c, sizeof(struct kmem_cache), 1, GFP_KERNEL); 535 kmemleak_alloc(c, sizeof(struct kmem_cache), 1, GFP_KERNEL);
536 c->refcount = 1;
537 }
596 return c; 538 return c;
597} 539}
598EXPORT_SYMBOL(kmem_cache_create);
599 540
600void kmem_cache_destroy(struct kmem_cache *c) 541void kmem_cache_destroy(struct kmem_cache *c)
601{ 542{
@@ -678,19 +619,12 @@ int kmem_cache_shrink(struct kmem_cache *d)
678} 619}
679EXPORT_SYMBOL(kmem_cache_shrink); 620EXPORT_SYMBOL(kmem_cache_shrink);
680 621
681static unsigned int slob_ready __read_mostly;
682
683int slab_is_available(void)
684{
685 return slob_ready;
686}
687
688void __init kmem_cache_init(void) 622void __init kmem_cache_init(void)
689{ 623{
690 slob_ready = 1; 624 slab_state = UP;
691} 625}
692 626
693void __init kmem_cache_init_late(void) 627void __init kmem_cache_init_late(void)
694{ 628{
695 /* Nothing to do */ 629 slab_state = FULL;
696} 630}
diff --git a/mm/slub.c b/mm/slub.c
index 8c691fa1cf3c..e517d435e5dc 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -16,6 +16,7 @@
16#include <linux/interrupt.h> 16#include <linux/interrupt.h>
17#include <linux/bitops.h> 17#include <linux/bitops.h>
18#include <linux/slab.h> 18#include <linux/slab.h>
19#include "slab.h"
19#include <linux/proc_fs.h> 20#include <linux/proc_fs.h>
20#include <linux/seq_file.h> 21#include <linux/seq_file.h>
21#include <linux/kmemcheck.h> 22#include <linux/kmemcheck.h>
@@ -35,13 +36,13 @@
35 36
36/* 37/*
37 * Lock order: 38 * Lock order:
38 * 1. slub_lock (Global Semaphore) 39 * 1. slab_mutex (Global Mutex)
39 * 2. node->list_lock 40 * 2. node->list_lock
40 * 3. slab_lock(page) (Only on some arches and for debugging) 41 * 3. slab_lock(page) (Only on some arches and for debugging)
41 * 42 *
42 * slub_lock 43 * slab_mutex
43 * 44 *
44 * The role of the slub_lock is to protect the list of all the slabs 45 * The role of the slab_mutex is to protect the list of all the slabs
45 * and to synchronize major metadata changes to slab cache structures. 46 * and to synchronize major metadata changes to slab cache structures.
46 * 47 *
47 * The slab_lock is only used for debugging and on arches that do not 48 * The slab_lock is only used for debugging and on arches that do not
@@ -182,17 +183,6 @@ static int kmem_size = sizeof(struct kmem_cache);
182static struct notifier_block slab_notifier; 183static struct notifier_block slab_notifier;
183#endif 184#endif
184 185
185static enum {
186 DOWN, /* No slab functionality available */
187 PARTIAL, /* Kmem_cache_node works */
188 UP, /* Everything works but does not show up in sysfs */
189 SYSFS /* Sysfs up */
190} slab_state = DOWN;
191
192/* A list of all slab caches on the system */
193static DECLARE_RWSEM(slub_lock);
194static LIST_HEAD(slab_caches);
195
196/* 186/*
197 * Tracking user of a slab. 187 * Tracking user of a slab.
198 */ 188 */
@@ -237,11 +227,6 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si)
237 * Core slab cache functions 227 * Core slab cache functions
238 *******************************************************************/ 228 *******************************************************************/
239 229
240int slab_is_available(void)
241{
242 return slab_state >= UP;
243}
244
245static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) 230static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
246{ 231{
247 return s->node[node]; 232 return s->node[node];
@@ -311,7 +296,7 @@ static inline size_t slab_ksize(const struct kmem_cache *s)
311 * and whatever may come after it. 296 * and whatever may come after it.
312 */ 297 */
313 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) 298 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
314 return s->objsize; 299 return s->object_size;
315 300
316#endif 301#endif
317 /* 302 /*
@@ -609,11 +594,11 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
609 if (p > addr + 16) 594 if (p > addr + 16)
610 print_section("Bytes b4 ", p - 16, 16); 595 print_section("Bytes b4 ", p - 16, 16);
611 596
612 print_section("Object ", p, min_t(unsigned long, s->objsize, 597 print_section("Object ", p, min_t(unsigned long, s->object_size,
613 PAGE_SIZE)); 598 PAGE_SIZE));
614 if (s->flags & SLAB_RED_ZONE) 599 if (s->flags & SLAB_RED_ZONE)
615 print_section("Redzone ", p + s->objsize, 600 print_section("Redzone ", p + s->object_size,
616 s->inuse - s->objsize); 601 s->inuse - s->object_size);
617 602
618 if (s->offset) 603 if (s->offset)
619 off = s->offset + sizeof(void *); 604 off = s->offset + sizeof(void *);
@@ -655,12 +640,12 @@ static void init_object(struct kmem_cache *s, void *object, u8 val)
655 u8 *p = object; 640 u8 *p = object;
656 641
657 if (s->flags & __OBJECT_POISON) { 642 if (s->flags & __OBJECT_POISON) {
658 memset(p, POISON_FREE, s->objsize - 1); 643 memset(p, POISON_FREE, s->object_size - 1);
659 p[s->objsize - 1] = POISON_END; 644 p[s->object_size - 1] = POISON_END;
660 } 645 }
661 646
662 if (s->flags & SLAB_RED_ZONE) 647 if (s->flags & SLAB_RED_ZONE)
663 memset(p + s->objsize, val, s->inuse - s->objsize); 648 memset(p + s->object_size, val, s->inuse - s->object_size);
664} 649}
665 650
666static void restore_bytes(struct kmem_cache *s, char *message, u8 data, 651static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
@@ -705,10 +690,10 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
705 * Poisoning uses 0x6b (POISON_FREE) and the last byte is 690 * Poisoning uses 0x6b (POISON_FREE) and the last byte is
706 * 0xa5 (POISON_END) 691 * 0xa5 (POISON_END)
707 * 692 *
708 * object + s->objsize 693 * object + s->object_size
709 * Padding to reach word boundary. This is also used for Redzoning. 694 * Padding to reach word boundary. This is also used for Redzoning.
710 * Padding is extended by another word if Redzoning is enabled and 695 * Padding is extended by another word if Redzoning is enabled and
711 * objsize == inuse. 696 * object_size == inuse.
712 * 697 *
713 * We fill with 0xbb (RED_INACTIVE) for inactive objects and with 698 * We fill with 0xbb (RED_INACTIVE) for inactive objects and with
714 * 0xcc (RED_ACTIVE) for objects in use. 699 * 0xcc (RED_ACTIVE) for objects in use.
@@ -727,7 +712,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
727 * object + s->size 712 * object + s->size
728 * Nothing is used beyond s->size. 713 * Nothing is used beyond s->size.
729 * 714 *
730 * If slabcaches are merged then the objsize and inuse boundaries are mostly 715 * If slabcaches are merged then the object_size and inuse boundaries are mostly
731 * ignored. And therefore no slab options that rely on these boundaries 716 * ignored. And therefore no slab options that rely on these boundaries
732 * may be used with merged slabcaches. 717 * may be used with merged slabcaches.
733 */ 718 */
@@ -787,25 +772,25 @@ static int check_object(struct kmem_cache *s, struct page *page,
787 void *object, u8 val) 772 void *object, u8 val)
788{ 773{
789 u8 *p = object; 774 u8 *p = object;
790 u8 *endobject = object + s->objsize; 775 u8 *endobject = object + s->object_size;
791 776
792 if (s->flags & SLAB_RED_ZONE) { 777 if (s->flags & SLAB_RED_ZONE) {
793 if (!check_bytes_and_report(s, page, object, "Redzone", 778 if (!check_bytes_and_report(s, page, object, "Redzone",
794 endobject, val, s->inuse - s->objsize)) 779 endobject, val, s->inuse - s->object_size))
795 return 0; 780 return 0;
796 } else { 781 } else {
797 if ((s->flags & SLAB_POISON) && s->objsize < s->inuse) { 782 if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
798 check_bytes_and_report(s, page, p, "Alignment padding", 783 check_bytes_and_report(s, page, p, "Alignment padding",
799 endobject, POISON_INUSE, s->inuse - s->objsize); 784 endobject, POISON_INUSE, s->inuse - s->object_size);
800 } 785 }
801 } 786 }
802 787
803 if (s->flags & SLAB_POISON) { 788 if (s->flags & SLAB_POISON) {
804 if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) && 789 if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
805 (!check_bytes_and_report(s, page, p, "Poison", p, 790 (!check_bytes_and_report(s, page, p, "Poison", p,
806 POISON_FREE, s->objsize - 1) || 791 POISON_FREE, s->object_size - 1) ||
807 !check_bytes_and_report(s, page, p, "Poison", 792 !check_bytes_and_report(s, page, p, "Poison",
808 p + s->objsize - 1, POISON_END, 1))) 793 p + s->object_size - 1, POISON_END, 1)))
809 return 0; 794 return 0;
810 /* 795 /*
811 * check_pad_bytes cleans up on its own. 796 * check_pad_bytes cleans up on its own.
@@ -926,7 +911,7 @@ static void trace(struct kmem_cache *s, struct page *page, void *object,
926 page->freelist); 911 page->freelist);
927 912
928 if (!alloc) 913 if (!alloc)
929 print_section("Object ", (void *)object, s->objsize); 914 print_section("Object ", (void *)object, s->object_size);
930 915
931 dump_stack(); 916 dump_stack();
932 } 917 }
@@ -942,14 +927,14 @@ static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
942 lockdep_trace_alloc(flags); 927 lockdep_trace_alloc(flags);
943 might_sleep_if(flags & __GFP_WAIT); 928 might_sleep_if(flags & __GFP_WAIT);
944 929
945 return should_failslab(s->objsize, flags, s->flags); 930 return should_failslab(s->object_size, flags, s->flags);
946} 931}
947 932
948static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object) 933static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object)
949{ 934{
950 flags &= gfp_allowed_mask; 935 flags &= gfp_allowed_mask;
951 kmemcheck_slab_alloc(s, flags, object, slab_ksize(s)); 936 kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
952 kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, flags); 937 kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags);
953} 938}
954 939
955static inline void slab_free_hook(struct kmem_cache *s, void *x) 940static inline void slab_free_hook(struct kmem_cache *s, void *x)
@@ -966,13 +951,13 @@ static inline void slab_free_hook(struct kmem_cache *s, void *x)
966 unsigned long flags; 951 unsigned long flags;
967 952
968 local_irq_save(flags); 953 local_irq_save(flags);
969 kmemcheck_slab_free(s, x, s->objsize); 954 kmemcheck_slab_free(s, x, s->object_size);
970 debug_check_no_locks_freed(x, s->objsize); 955 debug_check_no_locks_freed(x, s->object_size);
971 local_irq_restore(flags); 956 local_irq_restore(flags);
972 } 957 }
973#endif 958#endif
974 if (!(s->flags & SLAB_DEBUG_OBJECTS)) 959 if (!(s->flags & SLAB_DEBUG_OBJECTS))
975 debug_check_no_obj_freed(x, s->objsize); 960 debug_check_no_obj_freed(x, s->object_size);
976} 961}
977 962
978/* 963/*
@@ -1207,7 +1192,7 @@ out:
1207 1192
1208__setup("slub_debug", setup_slub_debug); 1193__setup("slub_debug", setup_slub_debug);
1209 1194
1210static unsigned long kmem_cache_flags(unsigned long objsize, 1195static unsigned long kmem_cache_flags(unsigned long object_size,
1211 unsigned long flags, const char *name, 1196 unsigned long flags, const char *name,
1212 void (*ctor)(void *)) 1197 void (*ctor)(void *))
1213{ 1198{
@@ -1237,7 +1222,7 @@ static inline int check_object(struct kmem_cache *s, struct page *page,
1237static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n, 1222static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
1238 struct page *page) {} 1223 struct page *page) {}
1239static inline void remove_full(struct kmem_cache *s, struct page *page) {} 1224static inline void remove_full(struct kmem_cache *s, struct page *page) {}
1240static inline unsigned long kmem_cache_flags(unsigned long objsize, 1225static inline unsigned long kmem_cache_flags(unsigned long object_size,
1241 unsigned long flags, const char *name, 1226 unsigned long flags, const char *name,
1242 void (*ctor)(void *)) 1227 void (*ctor)(void *))
1243{ 1228{
@@ -1314,13 +1299,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1314 stat(s, ORDER_FALLBACK); 1299 stat(s, ORDER_FALLBACK);
1315 } 1300 }
1316 1301
1317 if (flags & __GFP_WAIT) 1302 if (kmemcheck_enabled && page
1318 local_irq_disable();
1319
1320 if (!page)
1321 return NULL;
1322
1323 if (kmemcheck_enabled
1324 && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) { 1303 && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) {
1325 int pages = 1 << oo_order(oo); 1304 int pages = 1 << oo_order(oo);
1326 1305
@@ -1336,6 +1315,11 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1336 kmemcheck_mark_unallocated_pages(page, pages); 1315 kmemcheck_mark_unallocated_pages(page, pages);
1337 } 1316 }
1338 1317
1318 if (flags & __GFP_WAIT)
1319 local_irq_disable();
1320 if (!page)
1321 return NULL;
1322
1339 page->objects = oo_objects(oo); 1323 page->objects = oo_objects(oo);
1340 mod_zone_page_state(page_zone(page), 1324 mod_zone_page_state(page_zone(page),
1341 (s->flags & SLAB_RECLAIM_ACCOUNT) ? 1325 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
@@ -1490,12 +1474,12 @@ static inline void remove_partial(struct kmem_cache_node *n,
1490} 1474}
1491 1475
1492/* 1476/*
1493 * Lock slab, remove from the partial list and put the object into the 1477 * Remove slab from the partial list, freeze it and
1494 * per cpu freelist. 1478 * return the pointer to the freelist.
1495 * 1479 *
1496 * Returns a list of objects or NULL if it fails. 1480 * Returns a list of objects or NULL if it fails.
1497 * 1481 *
1498 * Must hold list_lock. 1482 * Must hold list_lock since we modify the partial list.
1499 */ 1483 */
1500static inline void *acquire_slab(struct kmem_cache *s, 1484static inline void *acquire_slab(struct kmem_cache *s,
1501 struct kmem_cache_node *n, struct page *page, 1485 struct kmem_cache_node *n, struct page *page,
@@ -1510,26 +1494,27 @@ static inline void *acquire_slab(struct kmem_cache *s,
1510 * The old freelist is the list of objects for the 1494 * The old freelist is the list of objects for the
1511 * per cpu allocation list. 1495 * per cpu allocation list.
1512 */ 1496 */
1513 do { 1497 freelist = page->freelist;
1514 freelist = page->freelist; 1498 counters = page->counters;
1515 counters = page->counters; 1499 new.counters = counters;
1516 new.counters = counters; 1500 if (mode) {
1517 if (mode) { 1501 new.inuse = page->objects;
1518 new.inuse = page->objects; 1502 new.freelist = NULL;
1519 new.freelist = NULL; 1503 } else {
1520 } else { 1504 new.freelist = freelist;
1521 new.freelist = freelist; 1505 }
1522 }
1523 1506
1524 VM_BUG_ON(new.frozen); 1507 VM_BUG_ON(new.frozen);
1525 new.frozen = 1; 1508 new.frozen = 1;
1526 1509
1527 } while (!__cmpxchg_double_slab(s, page, 1510 if (!__cmpxchg_double_slab(s, page,
1528 freelist, counters, 1511 freelist, counters,
1529 new.freelist, new.counters, 1512 new.freelist, new.counters,
1530 "lock and freeze")); 1513 "acquire_slab"))
1514 return NULL;
1531 1515
1532 remove_partial(n, page); 1516 remove_partial(n, page);
1517 WARN_ON(!freelist);
1533 return freelist; 1518 return freelist;
1534} 1519}
1535 1520
@@ -1563,7 +1548,6 @@ static void *get_partial_node(struct kmem_cache *s,
1563 1548
1564 if (!object) { 1549 if (!object) {
1565 c->page = page; 1550 c->page = page;
1566 c->node = page_to_nid(page);
1567 stat(s, ALLOC_FROM_PARTIAL); 1551 stat(s, ALLOC_FROM_PARTIAL);
1568 object = t; 1552 object = t;
1569 available = page->objects - page->inuse; 1553 available = page->objects - page->inuse;
@@ -1617,7 +1601,7 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
1617 1601
1618 do { 1602 do {
1619 cpuset_mems_cookie = get_mems_allowed(); 1603 cpuset_mems_cookie = get_mems_allowed();
1620 zonelist = node_zonelist(slab_node(current->mempolicy), flags); 1604 zonelist = node_zonelist(slab_node(), flags);
1621 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { 1605 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
1622 struct kmem_cache_node *n; 1606 struct kmem_cache_node *n;
1623 1607
@@ -1731,14 +1715,12 @@ void init_kmem_cache_cpus(struct kmem_cache *s)
1731/* 1715/*
1732 * Remove the cpu slab 1716 * Remove the cpu slab
1733 */ 1717 */
1734static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) 1718static void deactivate_slab(struct kmem_cache *s, struct page *page, void *freelist)
1735{ 1719{
1736 enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE }; 1720 enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
1737 struct page *page = c->page;
1738 struct kmem_cache_node *n = get_node(s, page_to_nid(page)); 1721 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1739 int lock = 0; 1722 int lock = 0;
1740 enum slab_modes l = M_NONE, m = M_NONE; 1723 enum slab_modes l = M_NONE, m = M_NONE;
1741 void *freelist;
1742 void *nextfree; 1724 void *nextfree;
1743 int tail = DEACTIVATE_TO_HEAD; 1725 int tail = DEACTIVATE_TO_HEAD;
1744 struct page new; 1726 struct page new;
@@ -1749,11 +1731,6 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1749 tail = DEACTIVATE_TO_TAIL; 1731 tail = DEACTIVATE_TO_TAIL;
1750 } 1732 }
1751 1733
1752 c->tid = next_tid(c->tid);
1753 c->page = NULL;
1754 freelist = c->freelist;
1755 c->freelist = NULL;
1756
1757 /* 1734 /*
1758 * Stage one: Free all available per cpu objects back 1735 * Stage one: Free all available per cpu objects back
1759 * to the page freelist while it is still frozen. Leave the 1736 * to the page freelist while it is still frozen. Leave the
@@ -1879,21 +1856,31 @@ redo:
1879 } 1856 }
1880} 1857}
1881 1858
1882/* Unfreeze all the cpu partial slabs */ 1859/*
1860 * Unfreeze all the cpu partial slabs.
1861 *
1862 * This function must be called with interrupt disabled.
1863 */
1883static void unfreeze_partials(struct kmem_cache *s) 1864static void unfreeze_partials(struct kmem_cache *s)
1884{ 1865{
1885 struct kmem_cache_node *n = NULL; 1866 struct kmem_cache_node *n = NULL, *n2 = NULL;
1886 struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab); 1867 struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);
1887 struct page *page, *discard_page = NULL; 1868 struct page *page, *discard_page = NULL;
1888 1869
1889 while ((page = c->partial)) { 1870 while ((page = c->partial)) {
1890 enum slab_modes { M_PARTIAL, M_FREE };
1891 enum slab_modes l, m;
1892 struct page new; 1871 struct page new;
1893 struct page old; 1872 struct page old;
1894 1873
1895 c->partial = page->next; 1874 c->partial = page->next;
1896 l = M_FREE; 1875
1876 n2 = get_node(s, page_to_nid(page));
1877 if (n != n2) {
1878 if (n)
1879 spin_unlock(&n->list_lock);
1880
1881 n = n2;
1882 spin_lock(&n->list_lock);
1883 }
1897 1884
1898 do { 1885 do {
1899 1886
@@ -1906,43 +1893,17 @@ static void unfreeze_partials(struct kmem_cache *s)
1906 1893
1907 new.frozen = 0; 1894 new.frozen = 0;
1908 1895
1909 if (!new.inuse && (!n || n->nr_partial > s->min_partial)) 1896 } while (!__cmpxchg_double_slab(s, page,
1910 m = M_FREE;
1911 else {
1912 struct kmem_cache_node *n2 = get_node(s,
1913 page_to_nid(page));
1914
1915 m = M_PARTIAL;
1916 if (n != n2) {
1917 if (n)
1918 spin_unlock(&n->list_lock);
1919
1920 n = n2;
1921 spin_lock(&n->list_lock);
1922 }
1923 }
1924
1925 if (l != m) {
1926 if (l == M_PARTIAL) {
1927 remove_partial(n, page);
1928 stat(s, FREE_REMOVE_PARTIAL);
1929 } else {
1930 add_partial(n, page,
1931 DEACTIVATE_TO_TAIL);
1932 stat(s, FREE_ADD_PARTIAL);
1933 }
1934
1935 l = m;
1936 }
1937
1938 } while (!cmpxchg_double_slab(s, page,
1939 old.freelist, old.counters, 1897 old.freelist, old.counters,
1940 new.freelist, new.counters, 1898 new.freelist, new.counters,
1941 "unfreezing slab")); 1899 "unfreezing slab"));
1942 1900
1943 if (m == M_FREE) { 1901 if (unlikely(!new.inuse && n->nr_partial > s->min_partial)) {
1944 page->next = discard_page; 1902 page->next = discard_page;
1945 discard_page = page; 1903 discard_page = page;
1904 } else {
1905 add_partial(n, page, DEACTIVATE_TO_TAIL);
1906 stat(s, FREE_ADD_PARTIAL);
1946 } 1907 }
1947 } 1908 }
1948 1909
@@ -2011,7 +1972,11 @@ int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
2011static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) 1972static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
2012{ 1973{
2013 stat(s, CPUSLAB_FLUSH); 1974 stat(s, CPUSLAB_FLUSH);
2014 deactivate_slab(s, c); 1975 deactivate_slab(s, c->page, c->freelist);
1976
1977 c->tid = next_tid(c->tid);
1978 c->page = NULL;
1979 c->freelist = NULL;
2015} 1980}
2016 1981
2017/* 1982/*
@@ -2055,10 +2020,10 @@ static void flush_all(struct kmem_cache *s)
2055 * Check if the objects in a per cpu structure fit numa 2020 * Check if the objects in a per cpu structure fit numa
2056 * locality expectations. 2021 * locality expectations.
2057 */ 2022 */
2058static inline int node_match(struct kmem_cache_cpu *c, int node) 2023static inline int node_match(struct page *page, int node)
2059{ 2024{
2060#ifdef CONFIG_NUMA 2025#ifdef CONFIG_NUMA
2061 if (node != NUMA_NO_NODE && c->node != node) 2026 if (node != NUMA_NO_NODE && page_to_nid(page) != node)
2062 return 0; 2027 return 0;
2063#endif 2028#endif
2064 return 1; 2029 return 1;
@@ -2101,10 +2066,10 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
2101 "SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n", 2066 "SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n",
2102 nid, gfpflags); 2067 nid, gfpflags);
2103 printk(KERN_WARNING " cache: %s, object size: %d, buffer size: %d, " 2068 printk(KERN_WARNING " cache: %s, object size: %d, buffer size: %d, "
2104 "default order: %d, min order: %d\n", s->name, s->objsize, 2069 "default order: %d, min order: %d\n", s->name, s->object_size,
2105 s->size, oo_order(s->oo), oo_order(s->min)); 2070 s->size, oo_order(s->oo), oo_order(s->min));
2106 2071
2107 if (oo_order(s->min) > get_order(s->objsize)) 2072 if (oo_order(s->min) > get_order(s->object_size))
2108 printk(KERN_WARNING " %s debugging increased min order, use " 2073 printk(KERN_WARNING " %s debugging increased min order, use "
2109 "slub_debug=O to disable.\n", s->name); 2074 "slub_debug=O to disable.\n", s->name);
2110 2075
@@ -2130,10 +2095,16 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
2130static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags, 2095static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
2131 int node, struct kmem_cache_cpu **pc) 2096 int node, struct kmem_cache_cpu **pc)
2132{ 2097{
2133 void *object; 2098 void *freelist;
2134 struct kmem_cache_cpu *c; 2099 struct kmem_cache_cpu *c = *pc;
2135 struct page *page = new_slab(s, flags, node); 2100 struct page *page;
2101
2102 freelist = get_partial(s, flags, node, c);
2136 2103
2104 if (freelist)
2105 return freelist;
2106
2107 page = new_slab(s, flags, node);
2137 if (page) { 2108 if (page) {
2138 c = __this_cpu_ptr(s->cpu_slab); 2109 c = __this_cpu_ptr(s->cpu_slab);
2139 if (c->page) 2110 if (c->page)
@@ -2143,17 +2114,16 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
2143 * No other reference to the page yet so we can 2114 * No other reference to the page yet so we can
2144 * muck around with it freely without cmpxchg 2115 * muck around with it freely without cmpxchg
2145 */ 2116 */
2146 object = page->freelist; 2117 freelist = page->freelist;
2147 page->freelist = NULL; 2118 page->freelist = NULL;
2148 2119
2149 stat(s, ALLOC_SLAB); 2120 stat(s, ALLOC_SLAB);
2150 c->node = page_to_nid(page);
2151 c->page = page; 2121 c->page = page;
2152 *pc = c; 2122 *pc = c;
2153 } else 2123 } else
2154 object = NULL; 2124 freelist = NULL;
2155 2125
2156 return object; 2126 return freelist;
2157} 2127}
2158 2128
2159/* 2129/*
@@ -2163,6 +2133,8 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
2163 * The page is still frozen if the return value is not NULL. 2133 * The page is still frozen if the return value is not NULL.
2164 * 2134 *
2165 * If this function returns NULL then the page has been unfrozen. 2135 * If this function returns NULL then the page has been unfrozen.
2136 *
2137 * This function must be called with interrupt disabled.
2166 */ 2138 */
2167static inline void *get_freelist(struct kmem_cache *s, struct page *page) 2139static inline void *get_freelist(struct kmem_cache *s, struct page *page)
2168{ 2140{
@@ -2173,13 +2145,14 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page)
2173 do { 2145 do {
2174 freelist = page->freelist; 2146 freelist = page->freelist;
2175 counters = page->counters; 2147 counters = page->counters;
2148
2176 new.counters = counters; 2149 new.counters = counters;
2177 VM_BUG_ON(!new.frozen); 2150 VM_BUG_ON(!new.frozen);
2178 2151
2179 new.inuse = page->objects; 2152 new.inuse = page->objects;
2180 new.frozen = freelist != NULL; 2153 new.frozen = freelist != NULL;
2181 2154
2182 } while (!cmpxchg_double_slab(s, page, 2155 } while (!__cmpxchg_double_slab(s, page,
2183 freelist, counters, 2156 freelist, counters,
2184 NULL, new.counters, 2157 NULL, new.counters,
2185 "get_freelist")); 2158 "get_freelist"));
@@ -2206,7 +2179,8 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page)
2206static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, 2179static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2207 unsigned long addr, struct kmem_cache_cpu *c) 2180 unsigned long addr, struct kmem_cache_cpu *c)
2208{ 2181{
2209 void **object; 2182 void *freelist;
2183 struct page *page;
2210 unsigned long flags; 2184 unsigned long flags;
2211 2185
2212 local_irq_save(flags); 2186 local_irq_save(flags);
@@ -2219,25 +2193,29 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2219 c = this_cpu_ptr(s->cpu_slab); 2193 c = this_cpu_ptr(s->cpu_slab);
2220#endif 2194#endif
2221 2195
2222 if (!c->page) 2196 page = c->page;
2197 if (!page)
2223 goto new_slab; 2198 goto new_slab;
2224redo: 2199redo:
2225 if (unlikely(!node_match(c, node))) { 2200
2201 if (unlikely(!node_match(page, node))) {
2226 stat(s, ALLOC_NODE_MISMATCH); 2202 stat(s, ALLOC_NODE_MISMATCH);
2227 deactivate_slab(s, c); 2203 deactivate_slab(s, page, c->freelist);
2204 c->page = NULL;
2205 c->freelist = NULL;
2228 goto new_slab; 2206 goto new_slab;
2229 } 2207 }
2230 2208
2231 /* must check again c->freelist in case of cpu migration or IRQ */ 2209 /* must check again c->freelist in case of cpu migration or IRQ */
2232 object = c->freelist; 2210 freelist = c->freelist;
2233 if (object) 2211 if (freelist)
2234 goto load_freelist; 2212 goto load_freelist;
2235 2213
2236 stat(s, ALLOC_SLOWPATH); 2214 stat(s, ALLOC_SLOWPATH);
2237 2215
2238 object = get_freelist(s, c->page); 2216 freelist = get_freelist(s, page);
2239 2217
2240 if (!object) { 2218 if (!freelist) {
2241 c->page = NULL; 2219 c->page = NULL;
2242 stat(s, DEACTIVATE_BYPASS); 2220 stat(s, DEACTIVATE_BYPASS);
2243 goto new_slab; 2221 goto new_slab;
@@ -2246,50 +2224,50 @@ redo:
2246 stat(s, ALLOC_REFILL); 2224 stat(s, ALLOC_REFILL);
2247 2225
2248load_freelist: 2226load_freelist:
2249 c->freelist = get_freepointer(s, object); 2227 /*
2228 * freelist is pointing to the list of objects to be used.
2229 * page is pointing to the page from which the objects are obtained.
2230 * That page must be frozen for per cpu allocations to work.
2231 */
2232 VM_BUG_ON(!c->page->frozen);
2233 c->freelist = get_freepointer(s, freelist);
2250 c->tid = next_tid(c->tid); 2234 c->tid = next_tid(c->tid);
2251 local_irq_restore(flags); 2235 local_irq_restore(flags);
2252 return object; 2236 return freelist;
2253 2237
2254new_slab: 2238new_slab:
2255 2239
2256 if (c->partial) { 2240 if (c->partial) {
2257 c->page = c->partial; 2241 page = c->page = c->partial;
2258 c->partial = c->page->next; 2242 c->partial = page->next;
2259 c->node = page_to_nid(c->page);
2260 stat(s, CPU_PARTIAL_ALLOC); 2243 stat(s, CPU_PARTIAL_ALLOC);
2261 c->freelist = NULL; 2244 c->freelist = NULL;
2262 goto redo; 2245 goto redo;
2263 } 2246 }
2264 2247
2265 /* Then do expensive stuff like retrieving pages from the partial lists */ 2248 freelist = new_slab_objects(s, gfpflags, node, &c);
2266 object = get_partial(s, gfpflags, node, c);
2267 2249
2268 if (unlikely(!object)) { 2250 if (unlikely(!freelist)) {
2251 if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
2252 slab_out_of_memory(s, gfpflags, node);
2269 2253
2270 object = new_slab_objects(s, gfpflags, node, &c); 2254 local_irq_restore(flags);
2271 2255 return NULL;
2272 if (unlikely(!object)) {
2273 if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
2274 slab_out_of_memory(s, gfpflags, node);
2275
2276 local_irq_restore(flags);
2277 return NULL;
2278 }
2279 } 2256 }
2280 2257
2258 page = c->page;
2281 if (likely(!kmem_cache_debug(s))) 2259 if (likely(!kmem_cache_debug(s)))
2282 goto load_freelist; 2260 goto load_freelist;
2283 2261
2284 /* Only entered in the debug case */ 2262 /* Only entered in the debug case */
2285 if (!alloc_debug_processing(s, c->page, object, addr)) 2263 if (!alloc_debug_processing(s, page, freelist, addr))
2286 goto new_slab; /* Slab failed checks. Next slab needed */ 2264 goto new_slab; /* Slab failed checks. Next slab needed */
2287 2265
2288 c->freelist = get_freepointer(s, object); 2266 deactivate_slab(s, page, get_freepointer(s, freelist));
2289 deactivate_slab(s, c); 2267 c->page = NULL;
2290 c->node = NUMA_NO_NODE; 2268 c->freelist = NULL;
2291 local_irq_restore(flags); 2269 local_irq_restore(flags);
2292 return object; 2270 return freelist;
2293} 2271}
2294 2272
2295/* 2273/*
@@ -2307,6 +2285,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
2307{ 2285{
2308 void **object; 2286 void **object;
2309 struct kmem_cache_cpu *c; 2287 struct kmem_cache_cpu *c;
2288 struct page *page;
2310 unsigned long tid; 2289 unsigned long tid;
2311 2290
2312 if (slab_pre_alloc_hook(s, gfpflags)) 2291 if (slab_pre_alloc_hook(s, gfpflags))
@@ -2332,7 +2311,8 @@ redo:
2332 barrier(); 2311 barrier();
2333 2312
2334 object = c->freelist; 2313 object = c->freelist;
2335 if (unlikely(!object || !node_match(c, node))) 2314 page = c->page;
2315 if (unlikely(!object || !node_match(page, node)))
2336 2316
2337 object = __slab_alloc(s, gfpflags, node, addr, c); 2317 object = __slab_alloc(s, gfpflags, node, addr, c);
2338 2318
@@ -2364,7 +2344,7 @@ redo:
2364 } 2344 }
2365 2345
2366 if (unlikely(gfpflags & __GFP_ZERO) && object) 2346 if (unlikely(gfpflags & __GFP_ZERO) && object)
2367 memset(object, 0, s->objsize); 2347 memset(object, 0, s->object_size);
2368 2348
2369 slab_post_alloc_hook(s, gfpflags, object); 2349 slab_post_alloc_hook(s, gfpflags, object);
2370 2350
@@ -2375,7 +2355,7 @@ void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
2375{ 2355{
2376 void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_); 2356 void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_);
2377 2357
2378 trace_kmem_cache_alloc(_RET_IP_, ret, s->objsize, s->size, gfpflags); 2358 trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size, s->size, gfpflags);
2379 2359
2380 return ret; 2360 return ret;
2381} 2361}
@@ -2405,7 +2385,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
2405 void *ret = slab_alloc(s, gfpflags, node, _RET_IP_); 2385 void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);
2406 2386
2407 trace_kmem_cache_alloc_node(_RET_IP_, ret, 2387 trace_kmem_cache_alloc_node(_RET_IP_, ret,
2408 s->objsize, s->size, gfpflags, node); 2388 s->object_size, s->size, gfpflags, node);
2409 2389
2410 return ret; 2390 return ret;
2411} 2391}
@@ -2900,7 +2880,7 @@ static void set_min_partial(struct kmem_cache *s, unsigned long min)
2900static int calculate_sizes(struct kmem_cache *s, int forced_order) 2880static int calculate_sizes(struct kmem_cache *s, int forced_order)
2901{ 2881{
2902 unsigned long flags = s->flags; 2882 unsigned long flags = s->flags;
2903 unsigned long size = s->objsize; 2883 unsigned long size = s->object_size;
2904 unsigned long align = s->align; 2884 unsigned long align = s->align;
2905 int order; 2885 int order;
2906 2886
@@ -2929,7 +2909,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
2929 * end of the object and the free pointer. If not then add an 2909 * end of the object and the free pointer. If not then add an
2930 * additional word to have some bytes to store Redzone information. 2910 * additional word to have some bytes to store Redzone information.
2931 */ 2911 */
2932 if ((flags & SLAB_RED_ZONE) && size == s->objsize) 2912 if ((flags & SLAB_RED_ZONE) && size == s->object_size)
2933 size += sizeof(void *); 2913 size += sizeof(void *);
2934#endif 2914#endif
2935 2915
@@ -2977,7 +2957,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
2977 * user specified and the dynamic determination of cache line size 2957 * user specified and the dynamic determination of cache line size
2978 * on bootup. 2958 * on bootup.
2979 */ 2959 */
2980 align = calculate_alignment(flags, align, s->objsize); 2960 align = calculate_alignment(flags, align, s->object_size);
2981 s->align = align; 2961 s->align = align;
2982 2962
2983 /* 2963 /*
@@ -3025,7 +3005,7 @@ static int kmem_cache_open(struct kmem_cache *s,
3025 memset(s, 0, kmem_size); 3005 memset(s, 0, kmem_size);
3026 s->name = name; 3006 s->name = name;
3027 s->ctor = ctor; 3007 s->ctor = ctor;
3028 s->objsize = size; 3008 s->object_size = size;
3029 s->align = align; 3009 s->align = align;
3030 s->flags = kmem_cache_flags(size, flags, name, ctor); 3010 s->flags = kmem_cache_flags(size, flags, name, ctor);
3031 s->reserved = 0; 3011 s->reserved = 0;
@@ -3040,7 +3020,7 @@ static int kmem_cache_open(struct kmem_cache *s,
3040 * Disable debugging flags that store metadata if the min slab 3020 * Disable debugging flags that store metadata if the min slab
3041 * order increased. 3021 * order increased.
3042 */ 3022 */
3043 if (get_order(s->size) > get_order(s->objsize)) { 3023 if (get_order(s->size) > get_order(s->object_size)) {
3044 s->flags &= ~DEBUG_METADATA_FLAGS; 3024 s->flags &= ~DEBUG_METADATA_FLAGS;
3045 s->offset = 0; 3025 s->offset = 0;
3046 if (!calculate_sizes(s, -1)) 3026 if (!calculate_sizes(s, -1))
@@ -3114,7 +3094,7 @@ error:
3114 */ 3094 */
3115unsigned int kmem_cache_size(struct kmem_cache *s) 3095unsigned int kmem_cache_size(struct kmem_cache *s)
3116{ 3096{
3117 return s->objsize; 3097 return s->object_size;
3118} 3098}
3119EXPORT_SYMBOL(kmem_cache_size); 3099EXPORT_SYMBOL(kmem_cache_size);
3120 3100
@@ -3192,11 +3172,11 @@ static inline int kmem_cache_close(struct kmem_cache *s)
3192 */ 3172 */
3193void kmem_cache_destroy(struct kmem_cache *s) 3173void kmem_cache_destroy(struct kmem_cache *s)
3194{ 3174{
3195 down_write(&slub_lock); 3175 mutex_lock(&slab_mutex);
3196 s->refcount--; 3176 s->refcount--;
3197 if (!s->refcount) { 3177 if (!s->refcount) {
3198 list_del(&s->list); 3178 list_del(&s->list);
3199 up_write(&slub_lock); 3179 mutex_unlock(&slab_mutex);
3200 if (kmem_cache_close(s)) { 3180 if (kmem_cache_close(s)) {
3201 printk(KERN_ERR "SLUB %s: %s called for cache that " 3181 printk(KERN_ERR "SLUB %s: %s called for cache that "
3202 "still has objects.\n", s->name, __func__); 3182 "still has objects.\n", s->name, __func__);
@@ -3206,7 +3186,7 @@ void kmem_cache_destroy(struct kmem_cache *s)
3206 rcu_barrier(); 3186 rcu_barrier();
3207 sysfs_slab_remove(s); 3187 sysfs_slab_remove(s);
3208 } else 3188 } else
3209 up_write(&slub_lock); 3189 mutex_unlock(&slab_mutex);
3210} 3190}
3211EXPORT_SYMBOL(kmem_cache_destroy); 3191EXPORT_SYMBOL(kmem_cache_destroy);
3212 3192
@@ -3268,7 +3248,7 @@ static struct kmem_cache *__init create_kmalloc_cache(const char *name,
3268 3248
3269 /* 3249 /*
3270 * This function is called with IRQs disabled during early-boot on 3250 * This function is called with IRQs disabled during early-boot on
3271 * single CPU so there's no need to take slub_lock here. 3251 * single CPU so there's no need to take slab_mutex here.
3272 */ 3252 */
3273 if (!kmem_cache_open(s, name, size, ARCH_KMALLOC_MINALIGN, 3253 if (!kmem_cache_open(s, name, size, ARCH_KMALLOC_MINALIGN,
3274 flags, NULL)) 3254 flags, NULL))
@@ -3553,10 +3533,10 @@ static int slab_mem_going_offline_callback(void *arg)
3553{ 3533{
3554 struct kmem_cache *s; 3534 struct kmem_cache *s;
3555 3535
3556 down_read(&slub_lock); 3536 mutex_lock(&slab_mutex);
3557 list_for_each_entry(s, &slab_caches, list) 3537 list_for_each_entry(s, &slab_caches, list)
3558 kmem_cache_shrink(s); 3538 kmem_cache_shrink(s);
3559 up_read(&slub_lock); 3539 mutex_unlock(&slab_mutex);
3560 3540
3561 return 0; 3541 return 0;
3562} 3542}
@@ -3577,7 +3557,7 @@ static void slab_mem_offline_callback(void *arg)
3577 if (offline_node < 0) 3557 if (offline_node < 0)
3578 return; 3558 return;
3579 3559
3580 down_read(&slub_lock); 3560 mutex_lock(&slab_mutex);
3581 list_for_each_entry(s, &slab_caches, list) { 3561 list_for_each_entry(s, &slab_caches, list) {
3582 n = get_node(s, offline_node); 3562 n = get_node(s, offline_node);
3583 if (n) { 3563 if (n) {
@@ -3593,7 +3573,7 @@ static void slab_mem_offline_callback(void *arg)
3593 kmem_cache_free(kmem_cache_node, n); 3573 kmem_cache_free(kmem_cache_node, n);
3594 } 3574 }
3595 } 3575 }
3596 up_read(&slub_lock); 3576 mutex_unlock(&slab_mutex);
3597} 3577}
3598 3578
3599static int slab_mem_going_online_callback(void *arg) 3579static int slab_mem_going_online_callback(void *arg)
@@ -3616,7 +3596,7 @@ static int slab_mem_going_online_callback(void *arg)
3616 * allocate a kmem_cache_node structure in order to bring the node 3596 * allocate a kmem_cache_node structure in order to bring the node
3617 * online. 3597 * online.
3618 */ 3598 */
3619 down_read(&slub_lock); 3599 mutex_lock(&slab_mutex);
3620 list_for_each_entry(s, &slab_caches, list) { 3600 list_for_each_entry(s, &slab_caches, list) {
3621 /* 3601 /*
3622 * XXX: kmem_cache_alloc_node will fallback to other nodes 3602 * XXX: kmem_cache_alloc_node will fallback to other nodes
@@ -3632,7 +3612,7 @@ static int slab_mem_going_online_callback(void *arg)
3632 s->node[nid] = n; 3612 s->node[nid] = n;
3633 } 3613 }
3634out: 3614out:
3635 up_read(&slub_lock); 3615 mutex_unlock(&slab_mutex);
3636 return ret; 3616 return ret;
3637} 3617}
3638 3618
@@ -3843,11 +3823,11 @@ void __init kmem_cache_init(void)
3843 3823
3844 if (s && s->size) { 3824 if (s && s->size) {
3845 char *name = kasprintf(GFP_NOWAIT, 3825 char *name = kasprintf(GFP_NOWAIT,
3846 "dma-kmalloc-%d", s->objsize); 3826 "dma-kmalloc-%d", s->object_size);
3847 3827
3848 BUG_ON(!name); 3828 BUG_ON(!name);
3849 kmalloc_dma_caches[i] = create_kmalloc_cache(name, 3829 kmalloc_dma_caches[i] = create_kmalloc_cache(name,
3850 s->objsize, SLAB_CACHE_DMA); 3830 s->object_size, SLAB_CACHE_DMA);
3851 } 3831 }
3852 } 3832 }
3853#endif 3833#endif
@@ -3924,16 +3904,12 @@ static struct kmem_cache *find_mergeable(size_t size,
3924 return NULL; 3904 return NULL;
3925} 3905}
3926 3906
3927struct kmem_cache *kmem_cache_create(const char *name, size_t size, 3907struct kmem_cache *__kmem_cache_create(const char *name, size_t size,
3928 size_t align, unsigned long flags, void (*ctor)(void *)) 3908 size_t align, unsigned long flags, void (*ctor)(void *))
3929{ 3909{
3930 struct kmem_cache *s; 3910 struct kmem_cache *s;
3931 char *n; 3911 char *n;
3932 3912
3933 if (WARN_ON(!name))
3934 return NULL;
3935
3936 down_write(&slub_lock);
3937 s = find_mergeable(size, align, flags, name, ctor); 3913 s = find_mergeable(size, align, flags, name, ctor);
3938 if (s) { 3914 if (s) {
3939 s->refcount++; 3915 s->refcount++;
@@ -3941,49 +3917,42 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
3941 * Adjust the object sizes so that we clear 3917 * Adjust the object sizes so that we clear
3942 * the complete object on kzalloc. 3918 * the complete object on kzalloc.
3943 */ 3919 */
3944 s->objsize = max(s->objsize, (int)size); 3920 s->object_size = max(s->object_size, (int)size);
3945 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); 3921 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
3946 3922
3947 if (sysfs_slab_alias(s, name)) { 3923 if (sysfs_slab_alias(s, name)) {
3948 s->refcount--; 3924 s->refcount--;
3949 goto err; 3925 return NULL;
3950 } 3926 }
3951 up_write(&slub_lock);
3952 return s; 3927 return s;
3953 } 3928 }
3954 3929
3955 n = kstrdup(name, GFP_KERNEL); 3930 n = kstrdup(name, GFP_KERNEL);
3956 if (!n) 3931 if (!n)
3957 goto err; 3932 return NULL;
3958 3933
3959 s = kmalloc(kmem_size, GFP_KERNEL); 3934 s = kmalloc(kmem_size, GFP_KERNEL);
3960 if (s) { 3935 if (s) {
3961 if (kmem_cache_open(s, n, 3936 if (kmem_cache_open(s, n,
3962 size, align, flags, ctor)) { 3937 size, align, flags, ctor)) {
3938 int r;
3939
3963 list_add(&s->list, &slab_caches); 3940 list_add(&s->list, &slab_caches);
3964 up_write(&slub_lock); 3941 mutex_unlock(&slab_mutex);
3965 if (sysfs_slab_add(s)) { 3942 r = sysfs_slab_add(s);
3966 down_write(&slub_lock); 3943 mutex_lock(&slab_mutex);
3967 list_del(&s->list); 3944
3968 kfree(n); 3945 if (!r)
3969 kfree(s); 3946 return s;
3970 goto err; 3947
3971 } 3948 list_del(&s->list);
3972 return s; 3949 kmem_cache_close(s);
3973 } 3950 }
3974 kfree(s); 3951 kfree(s);
3975 } 3952 }
3976 kfree(n); 3953 kfree(n);
3977err: 3954 return NULL;
3978 up_write(&slub_lock);
3979
3980 if (flags & SLAB_PANIC)
3981 panic("Cannot create slabcache %s\n", name);
3982 else
3983 s = NULL;
3984 return s;
3985} 3955}
3986EXPORT_SYMBOL(kmem_cache_create);
3987 3956
3988#ifdef CONFIG_SMP 3957#ifdef CONFIG_SMP
3989/* 3958/*
@@ -4002,13 +3971,13 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
4002 case CPU_UP_CANCELED_FROZEN: 3971 case CPU_UP_CANCELED_FROZEN:
4003 case CPU_DEAD: 3972 case CPU_DEAD:
4004 case CPU_DEAD_FROZEN: 3973 case CPU_DEAD_FROZEN:
4005 down_read(&slub_lock); 3974 mutex_lock(&slab_mutex);
4006 list_for_each_entry(s, &slab_caches, list) { 3975 list_for_each_entry(s, &slab_caches, list) {
4007 local_irq_save(flags); 3976 local_irq_save(flags);
4008 __flush_cpu_slab(s, cpu); 3977 __flush_cpu_slab(s, cpu);
4009 local_irq_restore(flags); 3978 local_irq_restore(flags);
4010 } 3979 }
4011 up_read(&slub_lock); 3980 mutex_unlock(&slab_mutex);
4012 break; 3981 break;
4013 default: 3982 default:
4014 break; 3983 break;
@@ -4500,30 +4469,31 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
4500 4469
4501 for_each_possible_cpu(cpu) { 4470 for_each_possible_cpu(cpu) {
4502 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); 4471 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
4503 int node = ACCESS_ONCE(c->node); 4472 int node;
4504 struct page *page; 4473 struct page *page;
4505 4474
4506 if (node < 0)
4507 continue;
4508 page = ACCESS_ONCE(c->page); 4475 page = ACCESS_ONCE(c->page);
4509 if (page) { 4476 if (!page)
4510 if (flags & SO_TOTAL) 4477 continue;
4511 x = page->objects;
4512 else if (flags & SO_OBJECTS)
4513 x = page->inuse;
4514 else
4515 x = 1;
4516 4478
4517 total += x; 4479 node = page_to_nid(page);
4518 nodes[node] += x; 4480 if (flags & SO_TOTAL)
4519 } 4481 x = page->objects;
4520 page = c->partial; 4482 else if (flags & SO_OBJECTS)
4483 x = page->inuse;
4484 else
4485 x = 1;
4521 4486
4487 total += x;
4488 nodes[node] += x;
4489
4490 page = ACCESS_ONCE(c->partial);
4522 if (page) { 4491 if (page) {
4523 x = page->pobjects; 4492 x = page->pobjects;
4524 total += x; 4493 total += x;
4525 nodes[node] += x; 4494 nodes[node] += x;
4526 } 4495 }
4496
4527 per_cpu[node]++; 4497 per_cpu[node]++;
4528 } 4498 }
4529 } 4499 }
@@ -4623,7 +4593,7 @@ SLAB_ATTR_RO(align);
4623 4593
4624static ssize_t object_size_show(struct kmem_cache *s, char *buf) 4594static ssize_t object_size_show(struct kmem_cache *s, char *buf)
4625{ 4595{
4626 return sprintf(buf, "%d\n", s->objsize); 4596 return sprintf(buf, "%d\n", s->object_size);
4627} 4597}
4628SLAB_ATTR_RO(object_size); 4598SLAB_ATTR_RO(object_size);
4629 4599
@@ -5286,7 +5256,7 @@ static int sysfs_slab_add(struct kmem_cache *s)
5286 const char *name; 5256 const char *name;
5287 int unmergeable; 5257 int unmergeable;
5288 5258
5289 if (slab_state < SYSFS) 5259 if (slab_state < FULL)
5290 /* Defer until later */ 5260 /* Defer until later */
5291 return 0; 5261 return 0;
5292 5262
@@ -5331,7 +5301,7 @@ static int sysfs_slab_add(struct kmem_cache *s)
5331 5301
5332static void sysfs_slab_remove(struct kmem_cache *s) 5302static void sysfs_slab_remove(struct kmem_cache *s)
5333{ 5303{
5334 if (slab_state < SYSFS) 5304 if (slab_state < FULL)
5335 /* 5305 /*
5336 * Sysfs has not been setup yet so no need to remove the 5306 * Sysfs has not been setup yet so no need to remove the
5337 * cache from sysfs. 5307 * cache from sysfs.
@@ -5359,7 +5329,7 @@ static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
5359{ 5329{
5360 struct saved_alias *al; 5330 struct saved_alias *al;
5361 5331
5362 if (slab_state == SYSFS) { 5332 if (slab_state == FULL) {
5363 /* 5333 /*
5364 * If we have a leftover link then remove it. 5334 * If we have a leftover link then remove it.
5365 */ 5335 */
@@ -5383,16 +5353,16 @@ static int __init slab_sysfs_init(void)
5383 struct kmem_cache *s; 5353 struct kmem_cache *s;
5384 int err; 5354 int err;
5385 5355
5386 down_write(&slub_lock); 5356 mutex_lock(&slab_mutex);
5387 5357
5388 slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj); 5358 slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);
5389 if (!slab_kset) { 5359 if (!slab_kset) {
5390 up_write(&slub_lock); 5360 mutex_unlock(&slab_mutex);
5391 printk(KERN_ERR "Cannot register slab subsystem.\n"); 5361 printk(KERN_ERR "Cannot register slab subsystem.\n");
5392 return -ENOSYS; 5362 return -ENOSYS;
5393 } 5363 }
5394 5364
5395 slab_state = SYSFS; 5365 slab_state = FULL;
5396 5366
5397 list_for_each_entry(s, &slab_caches, list) { 5367 list_for_each_entry(s, &slab_caches, list) {
5398 err = sysfs_slab_add(s); 5368 err = sysfs_slab_add(s);
@@ -5408,11 +5378,11 @@ static int __init slab_sysfs_init(void)
5408 err = sysfs_slab_alias(al->s, al->name); 5378 err = sysfs_slab_alias(al->s, al->name);
5409 if (err) 5379 if (err)
5410 printk(KERN_ERR "SLUB: Unable to add boot slab alias" 5380 printk(KERN_ERR "SLUB: Unable to add boot slab alias"
5411 " %s to sysfs\n", s->name); 5381 " %s to sysfs\n", al->name);
5412 kfree(al); 5382 kfree(al);
5413 } 5383 }
5414 5384
5415 up_write(&slub_lock); 5385 mutex_unlock(&slab_mutex);
5416 resiliency_test(); 5386 resiliency_test();
5417 return 0; 5387 return 0;
5418} 5388}
@@ -5427,7 +5397,7 @@ __initcall(slab_sysfs_init);
5427static void print_slabinfo_header(struct seq_file *m) 5397static void print_slabinfo_header(struct seq_file *m)
5428{ 5398{
5429 seq_puts(m, "slabinfo - version: 2.1\n"); 5399 seq_puts(m, "slabinfo - version: 2.1\n");
5430 seq_puts(m, "# name <active_objs> <num_objs> <objsize> " 5400 seq_puts(m, "# name <active_objs> <num_objs> <object_size> "
5431 "<objperslab> <pagesperslab>"); 5401 "<objperslab> <pagesperslab>");
5432 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>"); 5402 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
5433 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>"); 5403 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
@@ -5438,7 +5408,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
5438{ 5408{
5439 loff_t n = *pos; 5409 loff_t n = *pos;
5440 5410
5441 down_read(&slub_lock); 5411 mutex_lock(&slab_mutex);
5442 if (!n) 5412 if (!n)
5443 print_slabinfo_header(m); 5413 print_slabinfo_header(m);
5444 5414
@@ -5452,7 +5422,7 @@ static void *s_next(struct seq_file *m, void *p, loff_t *pos)
5452 5422
5453static void s_stop(struct seq_file *m, void *p) 5423static void s_stop(struct seq_file *m, void *p)
5454{ 5424{
5455 up_read(&slub_lock); 5425 mutex_unlock(&slab_mutex);
5456} 5426}
5457 5427
5458static int s_show(struct seq_file *m, void *p) 5428static int s_show(struct seq_file *m, void *p)
diff --git a/tools/vm/slabinfo.c b/tools/vm/slabinfo.c
index 164cbcf61106..808d5a9d5dcf 100644
--- a/tools/vm/slabinfo.c
+++ b/tools/vm/slabinfo.c
@@ -437,34 +437,34 @@ static void slab_stats(struct slabinfo *s)
437 printf("Fastpath %8lu %8lu %3lu %3lu\n", 437 printf("Fastpath %8lu %8lu %3lu %3lu\n",
438 s->alloc_fastpath, s->free_fastpath, 438 s->alloc_fastpath, s->free_fastpath,
439 s->alloc_fastpath * 100 / total_alloc, 439 s->alloc_fastpath * 100 / total_alloc,
440 s->free_fastpath * 100 / total_free); 440 total_free ? s->free_fastpath * 100 / total_free : 0);
441 printf("Slowpath %8lu %8lu %3lu %3lu\n", 441 printf("Slowpath %8lu %8lu %3lu %3lu\n",
442 total_alloc - s->alloc_fastpath, s->free_slowpath, 442 total_alloc - s->alloc_fastpath, s->free_slowpath,
443 (total_alloc - s->alloc_fastpath) * 100 / total_alloc, 443 (total_alloc - s->alloc_fastpath) * 100 / total_alloc,
444 s->free_slowpath * 100 / total_free); 444 total_free ? s->free_slowpath * 100 / total_free : 0);
445 printf("Page Alloc %8lu %8lu %3lu %3lu\n", 445 printf("Page Alloc %8lu %8lu %3lu %3lu\n",
446 s->alloc_slab, s->free_slab, 446 s->alloc_slab, s->free_slab,
447 s->alloc_slab * 100 / total_alloc, 447 s->alloc_slab * 100 / total_alloc,
448 s->free_slab * 100 / total_free); 448 total_free ? s->free_slab * 100 / total_free : 0);
449 printf("Add partial %8lu %8lu %3lu %3lu\n", 449 printf("Add partial %8lu %8lu %3lu %3lu\n",
450 s->deactivate_to_head + s->deactivate_to_tail, 450 s->deactivate_to_head + s->deactivate_to_tail,
451 s->free_add_partial, 451 s->free_add_partial,
452 (s->deactivate_to_head + s->deactivate_to_tail) * 100 / total_alloc, 452 (s->deactivate_to_head + s->deactivate_to_tail) * 100 / total_alloc,
453 s->free_add_partial * 100 / total_free); 453 total_free ? s->free_add_partial * 100 / total_free : 0);
454 printf("Remove partial %8lu %8lu %3lu %3lu\n", 454 printf("Remove partial %8lu %8lu %3lu %3lu\n",
455 s->alloc_from_partial, s->free_remove_partial, 455 s->alloc_from_partial, s->free_remove_partial,
456 s->alloc_from_partial * 100 / total_alloc, 456 s->alloc_from_partial * 100 / total_alloc,
457 s->free_remove_partial * 100 / total_free); 457 total_free ? s->free_remove_partial * 100 / total_free : 0);
458 458
459 printf("Cpu partial list %8lu %8lu %3lu %3lu\n", 459 printf("Cpu partial list %8lu %8lu %3lu %3lu\n",
460 s->cpu_partial_alloc, s->cpu_partial_free, 460 s->cpu_partial_alloc, s->cpu_partial_free,
461 s->cpu_partial_alloc * 100 / total_alloc, 461 s->cpu_partial_alloc * 100 / total_alloc,
462 s->cpu_partial_free * 100 / total_free); 462 total_free ? s->cpu_partial_free * 100 / total_free : 0);
463 463
464 printf("RemoteObj/SlabFrozen %8lu %8lu %3lu %3lu\n", 464 printf("RemoteObj/SlabFrozen %8lu %8lu %3lu %3lu\n",
465 s->deactivate_remote_frees, s->free_frozen, 465 s->deactivate_remote_frees, s->free_frozen,
466 s->deactivate_remote_frees * 100 / total_alloc, 466 s->deactivate_remote_frees * 100 / total_alloc,
467 s->free_frozen * 100 / total_free); 467 total_free ? s->free_frozen * 100 / total_free : 0);
468 468
469 printf("Total %8lu %8lu\n\n", total_alloc, total_free); 469 printf("Total %8lu %8lu\n\n", total_alloc, total_free);
470 470