aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-07-30 14:32:24 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-07-30 14:32:24 -0400
commit720d85075b7ed3617de8ca8d9097390e303e9f60 (patch)
tree3ce3911aa3f948b94949440954503c9f1b10ee64 /mm
parent637e49ae4f5b4a82b418dae8435e16132b298b7e (diff)
parent73a1180e140d45cb9ef5fbab103d3bbfc4c84606 (diff)
Merge branch 'slab/next' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/linux
Pull SLAB changes from Pekka Enberg: "Most of the changes included are from Christoph Lameter's "common slab" patch series that unifies common parts of SLUB, SLAB, and SLOB allocators. The unification is needed for Glauber Costa's "kmem memcg" work that will hopefully appear for v3.7. The rest of the changes are fixes and speedups by various people." * 'slab/next' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/linux: (32 commits) mm: Fix build warning in kmem_cache_create() slob: Fix early boot kernel crash mm, slub: ensure irqs are enabled for kmemcheck mm, sl[aou]b: Move kmem_cache_create mutex handling to common code mm, sl[aou]b: Use a common mutex definition mm, sl[aou]b: Common definition for boot state of the slab allocators mm, sl[aou]b: Extract common code for kmem_cache_create() slub: remove invalid reference to list iterator variable mm: Fix signal SIGFPE in slabinfo.c. slab: move FULL state transition to an initcall slab: Fix a typo in commit 8c138b "slab: Get rid of obj_size macro" mm, slab: Build fix for recent kmem_cache changes slab: rename gfpflags to allocflags slub: refactoring unfreeze_partials() slub: use __cmpxchg_double_slab() at interrupt disabled place slab/mempolicy: always use local policy from interrupt context slab: Get rid of obj_size macro mm, sl[aou]b: Extract common fields from struct kmem_cache slab: Remove some accessors slab: Use page struct fields instead of casting ...
Diffstat (limited to 'mm')
-rw-r--r--mm/Makefile3
-rw-r--r--mm/mempolicy.c8
-rw-r--r--mm/slab.c406
-rw-r--r--mm/slab.h33
-rw-r--r--mm/slab_common.c120
-rw-r--r--mm/slob.c152
-rw-r--r--mm/slub.c436
7 files changed, 559 insertions, 599 deletions
diff --git a/mm/Makefile b/mm/Makefile
index 2e2fbbefb99f..8e81fe263c94 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -16,7 +16,8 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \
16 readahead.o swap.o truncate.o vmscan.o shmem.o \ 16 readahead.o swap.o truncate.o vmscan.o shmem.o \
17 prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ 17 prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
18 page_isolation.o mm_init.o mmu_context.o percpu.o \ 18 page_isolation.o mm_init.o mmu_context.o percpu.o \
19 compaction.o $(mmu-y) 19 compaction.o slab_common.o $(mmu-y)
20
20obj-y += init-mm.o 21obj-y += init-mm.o
21 22
22ifdef CONFIG_NO_BOOTMEM 23ifdef CONFIG_NO_BOOTMEM
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 1d771e4200d2..bd92431d4c49 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1602,8 +1602,14 @@ static unsigned interleave_nodes(struct mempolicy *policy)
1602 * task can change it's policy. The system default policy requires no 1602 * task can change it's policy. The system default policy requires no
1603 * such protection. 1603 * such protection.
1604 */ 1604 */
1605unsigned slab_node(struct mempolicy *policy) 1605unsigned slab_node(void)
1606{ 1606{
1607 struct mempolicy *policy;
1608
1609 if (in_interrupt())
1610 return numa_node_id();
1611
1612 policy = current->mempolicy;
1607 if (!policy || policy->flags & MPOL_F_LOCAL) 1613 if (!policy || policy->flags & MPOL_F_LOCAL)
1608 return numa_node_id(); 1614 return numa_node_id();
1609 1615
diff --git a/mm/slab.c b/mm/slab.c
index e901a36e2520..1fcf3ac94b6c 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -68,7 +68,7 @@
68 * Further notes from the original documentation: 68 * Further notes from the original documentation:
69 * 69 *
70 * 11 April '97. Started multi-threading - markhe 70 * 11 April '97. Started multi-threading - markhe
71 * The global cache-chain is protected by the mutex 'cache_chain_mutex'. 71 * The global cache-chain is protected by the mutex 'slab_mutex'.
72 * The sem is only needed when accessing/extending the cache-chain, which 72 * The sem is only needed when accessing/extending the cache-chain, which
73 * can never happen inside an interrupt (kmem_cache_create(), 73 * can never happen inside an interrupt (kmem_cache_create(),
74 * kmem_cache_shrink() and kmem_cache_reap()). 74 * kmem_cache_shrink() and kmem_cache_reap()).
@@ -87,6 +87,7 @@
87 */ 87 */
88 88
89#include <linux/slab.h> 89#include <linux/slab.h>
90#include "slab.h"
90#include <linux/mm.h> 91#include <linux/mm.h>
91#include <linux/poison.h> 92#include <linux/poison.h>
92#include <linux/swap.h> 93#include <linux/swap.h>
@@ -424,8 +425,8 @@ static void kmem_list3_init(struct kmem_list3 *parent)
424 * cachep->obj_offset - BYTES_PER_WORD .. cachep->obj_offset - 1: 425 * cachep->obj_offset - BYTES_PER_WORD .. cachep->obj_offset - 1:
425 * redzone word. 426 * redzone word.
426 * cachep->obj_offset: The real object. 427 * cachep->obj_offset: The real object.
427 * cachep->buffer_size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long] 428 * cachep->size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long]
428 * cachep->buffer_size - 1* BYTES_PER_WORD: last caller address 429 * cachep->size - 1* BYTES_PER_WORD: last caller address
429 * [BYTES_PER_WORD long] 430 * [BYTES_PER_WORD long]
430 */ 431 */
431static int obj_offset(struct kmem_cache *cachep) 432static int obj_offset(struct kmem_cache *cachep)
@@ -433,11 +434,6 @@ static int obj_offset(struct kmem_cache *cachep)
433 return cachep->obj_offset; 434 return cachep->obj_offset;
434} 435}
435 436
436static int obj_size(struct kmem_cache *cachep)
437{
438 return cachep->obj_size;
439}
440
441static unsigned long long *dbg_redzone1(struct kmem_cache *cachep, void *objp) 437static unsigned long long *dbg_redzone1(struct kmem_cache *cachep, void *objp)
442{ 438{
443 BUG_ON(!(cachep->flags & SLAB_RED_ZONE)); 439 BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
@@ -449,23 +445,22 @@ static unsigned long long *dbg_redzone2(struct kmem_cache *cachep, void *objp)
449{ 445{
450 BUG_ON(!(cachep->flags & SLAB_RED_ZONE)); 446 BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
451 if (cachep->flags & SLAB_STORE_USER) 447 if (cachep->flags & SLAB_STORE_USER)
452 return (unsigned long long *)(objp + cachep->buffer_size - 448 return (unsigned long long *)(objp + cachep->size -
453 sizeof(unsigned long long) - 449 sizeof(unsigned long long) -
454 REDZONE_ALIGN); 450 REDZONE_ALIGN);
455 return (unsigned long long *) (objp + cachep->buffer_size - 451 return (unsigned long long *) (objp + cachep->size -
456 sizeof(unsigned long long)); 452 sizeof(unsigned long long));
457} 453}
458 454
459static void **dbg_userword(struct kmem_cache *cachep, void *objp) 455static void **dbg_userword(struct kmem_cache *cachep, void *objp)
460{ 456{
461 BUG_ON(!(cachep->flags & SLAB_STORE_USER)); 457 BUG_ON(!(cachep->flags & SLAB_STORE_USER));
462 return (void **)(objp + cachep->buffer_size - BYTES_PER_WORD); 458 return (void **)(objp + cachep->size - BYTES_PER_WORD);
463} 459}
464 460
465#else 461#else
466 462
467#define obj_offset(x) 0 463#define obj_offset(x) 0
468#define obj_size(cachep) (cachep->buffer_size)
469#define dbg_redzone1(cachep, objp) ({BUG(); (unsigned long long *)NULL;}) 464#define dbg_redzone1(cachep, objp) ({BUG(); (unsigned long long *)NULL;})
470#define dbg_redzone2(cachep, objp) ({BUG(); (unsigned long long *)NULL;}) 465#define dbg_redzone2(cachep, objp) ({BUG(); (unsigned long long *)NULL;})
471#define dbg_userword(cachep, objp) ({BUG(); (void **)NULL;}) 466#define dbg_userword(cachep, objp) ({BUG(); (void **)NULL;})
@@ -475,7 +470,7 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp)
475#ifdef CONFIG_TRACING 470#ifdef CONFIG_TRACING
476size_t slab_buffer_size(struct kmem_cache *cachep) 471size_t slab_buffer_size(struct kmem_cache *cachep)
477{ 472{
478 return cachep->buffer_size; 473 return cachep->size;
479} 474}
480EXPORT_SYMBOL(slab_buffer_size); 475EXPORT_SYMBOL(slab_buffer_size);
481#endif 476#endif
@@ -489,56 +484,37 @@ EXPORT_SYMBOL(slab_buffer_size);
489static int slab_max_order = SLAB_MAX_ORDER_LO; 484static int slab_max_order = SLAB_MAX_ORDER_LO;
490static bool slab_max_order_set __initdata; 485static bool slab_max_order_set __initdata;
491 486
492/*
493 * Functions for storing/retrieving the cachep and or slab from the page
494 * allocator. These are used to find the slab an obj belongs to. With kfree(),
495 * these are used to find the cache which an obj belongs to.
496 */
497static inline void page_set_cache(struct page *page, struct kmem_cache *cache)
498{
499 page->lru.next = (struct list_head *)cache;
500}
501
502static inline struct kmem_cache *page_get_cache(struct page *page) 487static inline struct kmem_cache *page_get_cache(struct page *page)
503{ 488{
504 page = compound_head(page); 489 page = compound_head(page);
505 BUG_ON(!PageSlab(page)); 490 BUG_ON(!PageSlab(page));
506 return (struct kmem_cache *)page->lru.next; 491 return page->slab_cache;
507}
508
509static inline void page_set_slab(struct page *page, struct slab *slab)
510{
511 page->lru.prev = (struct list_head *)slab;
512}
513
514static inline struct slab *page_get_slab(struct page *page)
515{
516 BUG_ON(!PageSlab(page));
517 return (struct slab *)page->lru.prev;
518} 492}
519 493
520static inline struct kmem_cache *virt_to_cache(const void *obj) 494static inline struct kmem_cache *virt_to_cache(const void *obj)
521{ 495{
522 struct page *page = virt_to_head_page(obj); 496 struct page *page = virt_to_head_page(obj);
523 return page_get_cache(page); 497 return page->slab_cache;
524} 498}
525 499
526static inline struct slab *virt_to_slab(const void *obj) 500static inline struct slab *virt_to_slab(const void *obj)
527{ 501{
528 struct page *page = virt_to_head_page(obj); 502 struct page *page = virt_to_head_page(obj);
529 return page_get_slab(page); 503
504 VM_BUG_ON(!PageSlab(page));
505 return page->slab_page;
530} 506}
531 507
532static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab, 508static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab,
533 unsigned int idx) 509 unsigned int idx)
534{ 510{
535 return slab->s_mem + cache->buffer_size * idx; 511 return slab->s_mem + cache->size * idx;
536} 512}
537 513
538/* 514/*
539 * We want to avoid an expensive divide : (offset / cache->buffer_size) 515 * We want to avoid an expensive divide : (offset / cache->size)
540 * Using the fact that buffer_size is a constant for a particular cache, 516 * Using the fact that size is a constant for a particular cache,
541 * we can replace (offset / cache->buffer_size) by 517 * we can replace (offset / cache->size) by
542 * reciprocal_divide(offset, cache->reciprocal_buffer_size) 518 * reciprocal_divide(offset, cache->reciprocal_buffer_size)
543 */ 519 */
544static inline unsigned int obj_to_index(const struct kmem_cache *cache, 520static inline unsigned int obj_to_index(const struct kmem_cache *cache,
@@ -584,33 +560,12 @@ static struct kmem_cache cache_cache = {
584 .batchcount = 1, 560 .batchcount = 1,
585 .limit = BOOT_CPUCACHE_ENTRIES, 561 .limit = BOOT_CPUCACHE_ENTRIES,
586 .shared = 1, 562 .shared = 1,
587 .buffer_size = sizeof(struct kmem_cache), 563 .size = sizeof(struct kmem_cache),
588 .name = "kmem_cache", 564 .name = "kmem_cache",
589}; 565};
590 566
591#define BAD_ALIEN_MAGIC 0x01020304ul 567#define BAD_ALIEN_MAGIC 0x01020304ul
592 568
593/*
594 * chicken and egg problem: delay the per-cpu array allocation
595 * until the general caches are up.
596 */
597static enum {
598 NONE,
599 PARTIAL_AC,
600 PARTIAL_L3,
601 EARLY,
602 LATE,
603 FULL
604} g_cpucache_up;
605
606/*
607 * used by boot code to determine if it can use slab based allocator
608 */
609int slab_is_available(void)
610{
611 return g_cpucache_up >= EARLY;
612}
613
614#ifdef CONFIG_LOCKDEP 569#ifdef CONFIG_LOCKDEP
615 570
616/* 571/*
@@ -676,7 +631,7 @@ static void init_node_lock_keys(int q)
676{ 631{
677 struct cache_sizes *s = malloc_sizes; 632 struct cache_sizes *s = malloc_sizes;
678 633
679 if (g_cpucache_up < LATE) 634 if (slab_state < UP)
680 return; 635 return;
681 636
682 for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) { 637 for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) {
@@ -716,12 +671,6 @@ static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
716} 671}
717#endif 672#endif
718 673
719/*
720 * Guard access to the cache-chain.
721 */
722static DEFINE_MUTEX(cache_chain_mutex);
723static struct list_head cache_chain;
724
725static DEFINE_PER_CPU(struct delayed_work, slab_reap_work); 674static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
726 675
727static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) 676static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
@@ -1145,7 +1094,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1145 * When hotplugging memory or a cpu, existing nodelists are not replaced if 1094 * When hotplugging memory or a cpu, existing nodelists are not replaced if
1146 * already in use. 1095 * already in use.
1147 * 1096 *
1148 * Must hold cache_chain_mutex. 1097 * Must hold slab_mutex.
1149 */ 1098 */
1150static int init_cache_nodelists_node(int node) 1099static int init_cache_nodelists_node(int node)
1151{ 1100{
@@ -1153,7 +1102,7 @@ static int init_cache_nodelists_node(int node)
1153 struct kmem_list3 *l3; 1102 struct kmem_list3 *l3;
1154 const int memsize = sizeof(struct kmem_list3); 1103 const int memsize = sizeof(struct kmem_list3);
1155 1104
1156 list_for_each_entry(cachep, &cache_chain, next) { 1105 list_for_each_entry(cachep, &slab_caches, list) {
1157 /* 1106 /*
1158 * Set up the size64 kmemlist for cpu before we can 1107 * Set up the size64 kmemlist for cpu before we can
1159 * begin anything. Make sure some other cpu on this 1108 * begin anything. Make sure some other cpu on this
@@ -1169,7 +1118,7 @@ static int init_cache_nodelists_node(int node)
1169 1118
1170 /* 1119 /*
1171 * The l3s don't come and go as CPUs come and 1120 * The l3s don't come and go as CPUs come and
1172 * go. cache_chain_mutex is sufficient 1121 * go. slab_mutex is sufficient
1173 * protection here. 1122 * protection here.
1174 */ 1123 */
1175 cachep->nodelists[node] = l3; 1124 cachep->nodelists[node] = l3;
@@ -1191,7 +1140,7 @@ static void __cpuinit cpuup_canceled(long cpu)
1191 int node = cpu_to_mem(cpu); 1140 int node = cpu_to_mem(cpu);
1192 const struct cpumask *mask = cpumask_of_node(node); 1141 const struct cpumask *mask = cpumask_of_node(node);
1193 1142
1194 list_for_each_entry(cachep, &cache_chain, next) { 1143 list_for_each_entry(cachep, &slab_caches, list) {
1195 struct array_cache *nc; 1144 struct array_cache *nc;
1196 struct array_cache *shared; 1145 struct array_cache *shared;
1197 struct array_cache **alien; 1146 struct array_cache **alien;
@@ -1241,7 +1190,7 @@ free_array_cache:
1241 * the respective cache's slabs, now we can go ahead and 1190 * the respective cache's slabs, now we can go ahead and
1242 * shrink each nodelist to its limit. 1191 * shrink each nodelist to its limit.
1243 */ 1192 */
1244 list_for_each_entry(cachep, &cache_chain, next) { 1193 list_for_each_entry(cachep, &slab_caches, list) {
1245 l3 = cachep->nodelists[node]; 1194 l3 = cachep->nodelists[node];
1246 if (!l3) 1195 if (!l3)
1247 continue; 1196 continue;
@@ -1270,7 +1219,7 @@ static int __cpuinit cpuup_prepare(long cpu)
1270 * Now we can go ahead with allocating the shared arrays and 1219 * Now we can go ahead with allocating the shared arrays and
1271 * array caches 1220 * array caches
1272 */ 1221 */
1273 list_for_each_entry(cachep, &cache_chain, next) { 1222 list_for_each_entry(cachep, &slab_caches, list) {
1274 struct array_cache *nc; 1223 struct array_cache *nc;
1275 struct array_cache *shared = NULL; 1224 struct array_cache *shared = NULL;
1276 struct array_cache **alien = NULL; 1225 struct array_cache **alien = NULL;
@@ -1338,9 +1287,9 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
1338 switch (action) { 1287 switch (action) {
1339 case CPU_UP_PREPARE: 1288 case CPU_UP_PREPARE:
1340 case CPU_UP_PREPARE_FROZEN: 1289 case CPU_UP_PREPARE_FROZEN:
1341 mutex_lock(&cache_chain_mutex); 1290 mutex_lock(&slab_mutex);
1342 err = cpuup_prepare(cpu); 1291 err = cpuup_prepare(cpu);
1343 mutex_unlock(&cache_chain_mutex); 1292 mutex_unlock(&slab_mutex);
1344 break; 1293 break;
1345 case CPU_ONLINE: 1294 case CPU_ONLINE:
1346 case CPU_ONLINE_FROZEN: 1295 case CPU_ONLINE_FROZEN:
@@ -1350,7 +1299,7 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
1350 case CPU_DOWN_PREPARE: 1299 case CPU_DOWN_PREPARE:
1351 case CPU_DOWN_PREPARE_FROZEN: 1300 case CPU_DOWN_PREPARE_FROZEN:
1352 /* 1301 /*
1353 * Shutdown cache reaper. Note that the cache_chain_mutex is 1302 * Shutdown cache reaper. Note that the slab_mutex is
1354 * held so that if cache_reap() is invoked it cannot do 1303 * held so that if cache_reap() is invoked it cannot do
1355 * anything expensive but will only modify reap_work 1304 * anything expensive but will only modify reap_work
1356 * and reschedule the timer. 1305 * and reschedule the timer.
@@ -1377,9 +1326,9 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
1377#endif 1326#endif
1378 case CPU_UP_CANCELED: 1327 case CPU_UP_CANCELED:
1379 case CPU_UP_CANCELED_FROZEN: 1328 case CPU_UP_CANCELED_FROZEN:
1380 mutex_lock(&cache_chain_mutex); 1329 mutex_lock(&slab_mutex);
1381 cpuup_canceled(cpu); 1330 cpuup_canceled(cpu);
1382 mutex_unlock(&cache_chain_mutex); 1331 mutex_unlock(&slab_mutex);
1383 break; 1332 break;
1384 } 1333 }
1385 return notifier_from_errno(err); 1334 return notifier_from_errno(err);
@@ -1395,14 +1344,14 @@ static struct notifier_block __cpuinitdata cpucache_notifier = {
1395 * Returns -EBUSY if all objects cannot be drained so that the node is not 1344 * Returns -EBUSY if all objects cannot be drained so that the node is not
1396 * removed. 1345 * removed.
1397 * 1346 *
1398 * Must hold cache_chain_mutex. 1347 * Must hold slab_mutex.
1399 */ 1348 */
1400static int __meminit drain_cache_nodelists_node(int node) 1349static int __meminit drain_cache_nodelists_node(int node)
1401{ 1350{
1402 struct kmem_cache *cachep; 1351 struct kmem_cache *cachep;
1403 int ret = 0; 1352 int ret = 0;
1404 1353
1405 list_for_each_entry(cachep, &cache_chain, next) { 1354 list_for_each_entry(cachep, &slab_caches, list) {
1406 struct kmem_list3 *l3; 1355 struct kmem_list3 *l3;
1407 1356
1408 l3 = cachep->nodelists[node]; 1357 l3 = cachep->nodelists[node];
@@ -1433,14 +1382,14 @@ static int __meminit slab_memory_callback(struct notifier_block *self,
1433 1382
1434 switch (action) { 1383 switch (action) {
1435 case MEM_GOING_ONLINE: 1384 case MEM_GOING_ONLINE:
1436 mutex_lock(&cache_chain_mutex); 1385 mutex_lock(&slab_mutex);
1437 ret = init_cache_nodelists_node(nid); 1386 ret = init_cache_nodelists_node(nid);
1438 mutex_unlock(&cache_chain_mutex); 1387 mutex_unlock(&slab_mutex);
1439 break; 1388 break;
1440 case MEM_GOING_OFFLINE: 1389 case MEM_GOING_OFFLINE:
1441 mutex_lock(&cache_chain_mutex); 1390 mutex_lock(&slab_mutex);
1442 ret = drain_cache_nodelists_node(nid); 1391 ret = drain_cache_nodelists_node(nid);
1443 mutex_unlock(&cache_chain_mutex); 1392 mutex_unlock(&slab_mutex);
1444 break; 1393 break;
1445 case MEM_ONLINE: 1394 case MEM_ONLINE:
1446 case MEM_OFFLINE: 1395 case MEM_OFFLINE:
@@ -1544,8 +1493,8 @@ void __init kmem_cache_init(void)
1544 node = numa_mem_id(); 1493 node = numa_mem_id();
1545 1494
1546 /* 1) create the cache_cache */ 1495 /* 1) create the cache_cache */
1547 INIT_LIST_HEAD(&cache_chain); 1496 INIT_LIST_HEAD(&slab_caches);
1548 list_add(&cache_cache.next, &cache_chain); 1497 list_add(&cache_cache.list, &slab_caches);
1549 cache_cache.colour_off = cache_line_size(); 1498 cache_cache.colour_off = cache_line_size();
1550 cache_cache.array[smp_processor_id()] = &initarray_cache.cache; 1499 cache_cache.array[smp_processor_id()] = &initarray_cache.cache;
1551 cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node]; 1500 cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node];
@@ -1553,18 +1502,16 @@ void __init kmem_cache_init(void)
1553 /* 1502 /*
1554 * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids 1503 * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids
1555 */ 1504 */
1556 cache_cache.buffer_size = offsetof(struct kmem_cache, array[nr_cpu_ids]) + 1505 cache_cache.size = offsetof(struct kmem_cache, array[nr_cpu_ids]) +
1557 nr_node_ids * sizeof(struct kmem_list3 *); 1506 nr_node_ids * sizeof(struct kmem_list3 *);
1558#if DEBUG 1507 cache_cache.object_size = cache_cache.size;
1559 cache_cache.obj_size = cache_cache.buffer_size; 1508 cache_cache.size = ALIGN(cache_cache.size,
1560#endif
1561 cache_cache.buffer_size = ALIGN(cache_cache.buffer_size,
1562 cache_line_size()); 1509 cache_line_size());
1563 cache_cache.reciprocal_buffer_size = 1510 cache_cache.reciprocal_buffer_size =
1564 reciprocal_value(cache_cache.buffer_size); 1511 reciprocal_value(cache_cache.size);
1565 1512
1566 for (order = 0; order < MAX_ORDER; order++) { 1513 for (order = 0; order < MAX_ORDER; order++) {
1567 cache_estimate(order, cache_cache.buffer_size, 1514 cache_estimate(order, cache_cache.size,
1568 cache_line_size(), 0, &left_over, &cache_cache.num); 1515 cache_line_size(), 0, &left_over, &cache_cache.num);
1569 if (cache_cache.num) 1516 if (cache_cache.num)
1570 break; 1517 break;
@@ -1585,7 +1532,7 @@ void __init kmem_cache_init(void)
1585 * bug. 1532 * bug.
1586 */ 1533 */
1587 1534
1588 sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name, 1535 sizes[INDEX_AC].cs_cachep = __kmem_cache_create(names[INDEX_AC].name,
1589 sizes[INDEX_AC].cs_size, 1536 sizes[INDEX_AC].cs_size,
1590 ARCH_KMALLOC_MINALIGN, 1537 ARCH_KMALLOC_MINALIGN,
1591 ARCH_KMALLOC_FLAGS|SLAB_PANIC, 1538 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
@@ -1593,7 +1540,7 @@ void __init kmem_cache_init(void)
1593 1540
1594 if (INDEX_AC != INDEX_L3) { 1541 if (INDEX_AC != INDEX_L3) {
1595 sizes[INDEX_L3].cs_cachep = 1542 sizes[INDEX_L3].cs_cachep =
1596 kmem_cache_create(names[INDEX_L3].name, 1543 __kmem_cache_create(names[INDEX_L3].name,
1597 sizes[INDEX_L3].cs_size, 1544 sizes[INDEX_L3].cs_size,
1598 ARCH_KMALLOC_MINALIGN, 1545 ARCH_KMALLOC_MINALIGN,
1599 ARCH_KMALLOC_FLAGS|SLAB_PANIC, 1546 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
@@ -1611,14 +1558,14 @@ void __init kmem_cache_init(void)
1611 * allow tighter packing of the smaller caches. 1558 * allow tighter packing of the smaller caches.
1612 */ 1559 */
1613 if (!sizes->cs_cachep) { 1560 if (!sizes->cs_cachep) {
1614 sizes->cs_cachep = kmem_cache_create(names->name, 1561 sizes->cs_cachep = __kmem_cache_create(names->name,
1615 sizes->cs_size, 1562 sizes->cs_size,
1616 ARCH_KMALLOC_MINALIGN, 1563 ARCH_KMALLOC_MINALIGN,
1617 ARCH_KMALLOC_FLAGS|SLAB_PANIC, 1564 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1618 NULL); 1565 NULL);
1619 } 1566 }
1620#ifdef CONFIG_ZONE_DMA 1567#ifdef CONFIG_ZONE_DMA
1621 sizes->cs_dmacachep = kmem_cache_create( 1568 sizes->cs_dmacachep = __kmem_cache_create(
1622 names->name_dma, 1569 names->name_dma,
1623 sizes->cs_size, 1570 sizes->cs_size,
1624 ARCH_KMALLOC_MINALIGN, 1571 ARCH_KMALLOC_MINALIGN,
@@ -1676,27 +1623,27 @@ void __init kmem_cache_init(void)
1676 } 1623 }
1677 } 1624 }
1678 1625
1679 g_cpucache_up = EARLY; 1626 slab_state = UP;
1680} 1627}
1681 1628
1682void __init kmem_cache_init_late(void) 1629void __init kmem_cache_init_late(void)
1683{ 1630{
1684 struct kmem_cache *cachep; 1631 struct kmem_cache *cachep;
1685 1632
1686 g_cpucache_up = LATE; 1633 slab_state = UP;
1687 1634
1688 /* Annotate slab for lockdep -- annotate the malloc caches */ 1635 /* Annotate slab for lockdep -- annotate the malloc caches */
1689 init_lock_keys(); 1636 init_lock_keys();
1690 1637
1691 /* 6) resize the head arrays to their final sizes */ 1638 /* 6) resize the head arrays to their final sizes */
1692 mutex_lock(&cache_chain_mutex); 1639 mutex_lock(&slab_mutex);
1693 list_for_each_entry(cachep, &cache_chain, next) 1640 list_for_each_entry(cachep, &slab_caches, list)
1694 if (enable_cpucache(cachep, GFP_NOWAIT)) 1641 if (enable_cpucache(cachep, GFP_NOWAIT))
1695 BUG(); 1642 BUG();
1696 mutex_unlock(&cache_chain_mutex); 1643 mutex_unlock(&slab_mutex);
1697 1644
1698 /* Done! */ 1645 /* Done! */
1699 g_cpucache_up = FULL; 1646 slab_state = FULL;
1700 1647
1701 /* 1648 /*
1702 * Register a cpu startup notifier callback that initializes 1649 * Register a cpu startup notifier callback that initializes
@@ -1727,6 +1674,9 @@ static int __init cpucache_init(void)
1727 */ 1674 */
1728 for_each_online_cpu(cpu) 1675 for_each_online_cpu(cpu)
1729 start_cpu_timer(cpu); 1676 start_cpu_timer(cpu);
1677
1678 /* Done! */
1679 slab_state = FULL;
1730 return 0; 1680 return 0;
1731} 1681}
1732__initcall(cpucache_init); 1682__initcall(cpucache_init);
@@ -1743,7 +1693,7 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
1743 "SLAB: Unable to allocate memory on node %d (gfp=0x%x)\n", 1693 "SLAB: Unable to allocate memory on node %d (gfp=0x%x)\n",
1744 nodeid, gfpflags); 1694 nodeid, gfpflags);
1745 printk(KERN_WARNING " cache: %s, object size: %d, order: %d\n", 1695 printk(KERN_WARNING " cache: %s, object size: %d, order: %d\n",
1746 cachep->name, cachep->buffer_size, cachep->gfporder); 1696 cachep->name, cachep->size, cachep->gfporder);
1747 1697
1748 for_each_online_node(node) { 1698 for_each_online_node(node) {
1749 unsigned long active_objs = 0, num_objs = 0, free_objects = 0; 1699 unsigned long active_objs = 0, num_objs = 0, free_objects = 0;
@@ -1798,7 +1748,7 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
1798 flags |= __GFP_COMP; 1748 flags |= __GFP_COMP;
1799#endif 1749#endif
1800 1750
1801 flags |= cachep->gfpflags; 1751 flags |= cachep->allocflags;
1802 if (cachep->flags & SLAB_RECLAIM_ACCOUNT) 1752 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1803 flags |= __GFP_RECLAIMABLE; 1753 flags |= __GFP_RECLAIMABLE;
1804 1754
@@ -1874,7 +1824,7 @@ static void kmem_rcu_free(struct rcu_head *head)
1874static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr, 1824static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
1875 unsigned long caller) 1825 unsigned long caller)
1876{ 1826{
1877 int size = obj_size(cachep); 1827 int size = cachep->object_size;
1878 1828
1879 addr = (unsigned long *)&((char *)addr)[obj_offset(cachep)]; 1829 addr = (unsigned long *)&((char *)addr)[obj_offset(cachep)];
1880 1830
@@ -1906,7 +1856,7 @@ static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
1906 1856
1907static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val) 1857static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val)
1908{ 1858{
1909 int size = obj_size(cachep); 1859 int size = cachep->object_size;
1910 addr = &((char *)addr)[obj_offset(cachep)]; 1860 addr = &((char *)addr)[obj_offset(cachep)];
1911 1861
1912 memset(addr, val, size); 1862 memset(addr, val, size);
@@ -1966,7 +1916,7 @@ static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines)
1966 printk("\n"); 1916 printk("\n");
1967 } 1917 }
1968 realobj = (char *)objp + obj_offset(cachep); 1918 realobj = (char *)objp + obj_offset(cachep);
1969 size = obj_size(cachep); 1919 size = cachep->object_size;
1970 for (i = 0; i < size && lines; i += 16, lines--) { 1920 for (i = 0; i < size && lines; i += 16, lines--) {
1971 int limit; 1921 int limit;
1972 limit = 16; 1922 limit = 16;
@@ -1983,7 +1933,7 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
1983 int lines = 0; 1933 int lines = 0;
1984 1934
1985 realobj = (char *)objp + obj_offset(cachep); 1935 realobj = (char *)objp + obj_offset(cachep);
1986 size = obj_size(cachep); 1936 size = cachep->object_size;
1987 1937
1988 for (i = 0; i < size; i++) { 1938 for (i = 0; i < size; i++) {
1989 char exp = POISON_FREE; 1939 char exp = POISON_FREE;
@@ -2047,10 +1997,10 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slab
2047 1997
2048 if (cachep->flags & SLAB_POISON) { 1998 if (cachep->flags & SLAB_POISON) {
2049#ifdef CONFIG_DEBUG_PAGEALLOC 1999#ifdef CONFIG_DEBUG_PAGEALLOC
2050 if (cachep->buffer_size % PAGE_SIZE == 0 && 2000 if (cachep->size % PAGE_SIZE == 0 &&
2051 OFF_SLAB(cachep)) 2001 OFF_SLAB(cachep))
2052 kernel_map_pages(virt_to_page(objp), 2002 kernel_map_pages(virt_to_page(objp),
2053 cachep->buffer_size / PAGE_SIZE, 1); 2003 cachep->size / PAGE_SIZE, 1);
2054 else 2004 else
2055 check_poison_obj(cachep, objp); 2005 check_poison_obj(cachep, objp);
2056#else 2006#else
@@ -2194,10 +2144,10 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
2194 2144
2195static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) 2145static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
2196{ 2146{
2197 if (g_cpucache_up == FULL) 2147 if (slab_state >= FULL)
2198 return enable_cpucache(cachep, gfp); 2148 return enable_cpucache(cachep, gfp);
2199 2149
2200 if (g_cpucache_up == NONE) { 2150 if (slab_state == DOWN) {
2201 /* 2151 /*
2202 * Note: the first kmem_cache_create must create the cache 2152 * Note: the first kmem_cache_create must create the cache
2203 * that's used by kmalloc(24), otherwise the creation of 2153 * that's used by kmalloc(24), otherwise the creation of
@@ -2212,16 +2162,16 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
2212 */ 2162 */
2213 set_up_list3s(cachep, SIZE_AC); 2163 set_up_list3s(cachep, SIZE_AC);
2214 if (INDEX_AC == INDEX_L3) 2164 if (INDEX_AC == INDEX_L3)
2215 g_cpucache_up = PARTIAL_L3; 2165 slab_state = PARTIAL_L3;
2216 else 2166 else
2217 g_cpucache_up = PARTIAL_AC; 2167 slab_state = PARTIAL_ARRAYCACHE;
2218 } else { 2168 } else {
2219 cachep->array[smp_processor_id()] = 2169 cachep->array[smp_processor_id()] =
2220 kmalloc(sizeof(struct arraycache_init), gfp); 2170 kmalloc(sizeof(struct arraycache_init), gfp);
2221 2171
2222 if (g_cpucache_up == PARTIAL_AC) { 2172 if (slab_state == PARTIAL_ARRAYCACHE) {
2223 set_up_list3s(cachep, SIZE_L3); 2173 set_up_list3s(cachep, SIZE_L3);
2224 g_cpucache_up = PARTIAL_L3; 2174 slab_state = PARTIAL_L3;
2225 } else { 2175 } else {
2226 int node; 2176 int node;
2227 for_each_online_node(node) { 2177 for_each_online_node(node) {
@@ -2247,7 +2197,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
2247} 2197}
2248 2198
2249/** 2199/**
2250 * kmem_cache_create - Create a cache. 2200 * __kmem_cache_create - Create a cache.
2251 * @name: A string which is used in /proc/slabinfo to identify this cache. 2201 * @name: A string which is used in /proc/slabinfo to identify this cache.
2252 * @size: The size of objects to be created in this cache. 2202 * @size: The size of objects to be created in this cache.
2253 * @align: The required alignment for the objects. 2203 * @align: The required alignment for the objects.
@@ -2274,59 +2224,14 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
2274 * as davem. 2224 * as davem.
2275 */ 2225 */
2276struct kmem_cache * 2226struct kmem_cache *
2277kmem_cache_create (const char *name, size_t size, size_t align, 2227__kmem_cache_create (const char *name, size_t size, size_t align,
2278 unsigned long flags, void (*ctor)(void *)) 2228 unsigned long flags, void (*ctor)(void *))
2279{ 2229{
2280 size_t left_over, slab_size, ralign; 2230 size_t left_over, slab_size, ralign;
2281 struct kmem_cache *cachep = NULL, *pc; 2231 struct kmem_cache *cachep = NULL;
2282 gfp_t gfp; 2232 gfp_t gfp;
2283 2233
2284 /*
2285 * Sanity checks... these are all serious usage bugs.
2286 */
2287 if (!name || in_interrupt() || (size < BYTES_PER_WORD) ||
2288 size > KMALLOC_MAX_SIZE) {
2289 printk(KERN_ERR "%s: Early error in slab %s\n", __func__,
2290 name);
2291 BUG();
2292 }
2293
2294 /*
2295 * We use cache_chain_mutex to ensure a consistent view of
2296 * cpu_online_mask as well. Please see cpuup_callback
2297 */
2298 if (slab_is_available()) {
2299 get_online_cpus();
2300 mutex_lock(&cache_chain_mutex);
2301 }
2302
2303 list_for_each_entry(pc, &cache_chain, next) {
2304 char tmp;
2305 int res;
2306
2307 /*
2308 * This happens when the module gets unloaded and doesn't
2309 * destroy its slab cache and no-one else reuses the vmalloc
2310 * area of the module. Print a warning.
2311 */
2312 res = probe_kernel_address(pc->name, tmp);
2313 if (res) {
2314 printk(KERN_ERR
2315 "SLAB: cache with size %d has lost its name\n",
2316 pc->buffer_size);
2317 continue;
2318 }
2319
2320 if (!strcmp(pc->name, name)) {
2321 printk(KERN_ERR
2322 "kmem_cache_create: duplicate cache %s\n", name);
2323 dump_stack();
2324 goto oops;
2325 }
2326 }
2327
2328#if DEBUG 2234#if DEBUG
2329 WARN_ON(strchr(name, ' ')); /* It confuses parsers */
2330#if FORCED_DEBUG 2235#if FORCED_DEBUG
2331 /* 2236 /*
2332 * Enable redzoning and last user accounting, except for caches with 2237 * Enable redzoning and last user accounting, except for caches with
@@ -2415,11 +2320,12 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2415 /* Get cache's description obj. */ 2320 /* Get cache's description obj. */
2416 cachep = kmem_cache_zalloc(&cache_cache, gfp); 2321 cachep = kmem_cache_zalloc(&cache_cache, gfp);
2417 if (!cachep) 2322 if (!cachep)
2418 goto oops; 2323 return NULL;
2419 2324
2420 cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids]; 2325 cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids];
2326 cachep->object_size = size;
2327 cachep->align = align;
2421#if DEBUG 2328#if DEBUG
2422 cachep->obj_size = size;
2423 2329
2424 /* 2330 /*
2425 * Both debugging options require word-alignment which is calculated 2331 * Both debugging options require word-alignment which is calculated
@@ -2442,7 +2348,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2442 } 2348 }
2443#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC) 2349#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
2444 if (size >= malloc_sizes[INDEX_L3 + 1].cs_size 2350 if (size >= malloc_sizes[INDEX_L3 + 1].cs_size
2445 && cachep->obj_size > cache_line_size() && ALIGN(size, align) < PAGE_SIZE) { 2351 && cachep->object_size > cache_line_size() && ALIGN(size, align) < PAGE_SIZE) {
2446 cachep->obj_offset += PAGE_SIZE - ALIGN(size, align); 2352 cachep->obj_offset += PAGE_SIZE - ALIGN(size, align);
2447 size = PAGE_SIZE; 2353 size = PAGE_SIZE;
2448 } 2354 }
@@ -2471,8 +2377,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2471 printk(KERN_ERR 2377 printk(KERN_ERR
2472 "kmem_cache_create: couldn't create cache %s.\n", name); 2378 "kmem_cache_create: couldn't create cache %s.\n", name);
2473 kmem_cache_free(&cache_cache, cachep); 2379 kmem_cache_free(&cache_cache, cachep);
2474 cachep = NULL; 2380 return NULL;
2475 goto oops;
2476 } 2381 }
2477 slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t) 2382 slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t)
2478 + sizeof(struct slab), align); 2383 + sizeof(struct slab), align);
@@ -2508,10 +2413,10 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2508 cachep->colour = left_over / cachep->colour_off; 2413 cachep->colour = left_over / cachep->colour_off;
2509 cachep->slab_size = slab_size; 2414 cachep->slab_size = slab_size;
2510 cachep->flags = flags; 2415 cachep->flags = flags;
2511 cachep->gfpflags = 0; 2416 cachep->allocflags = 0;
2512 if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA)) 2417 if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA))
2513 cachep->gfpflags |= GFP_DMA; 2418 cachep->allocflags |= GFP_DMA;
2514 cachep->buffer_size = size; 2419 cachep->size = size;
2515 cachep->reciprocal_buffer_size = reciprocal_value(size); 2420 cachep->reciprocal_buffer_size = reciprocal_value(size);
2516 2421
2517 if (flags & CFLGS_OFF_SLAB) { 2422 if (flags & CFLGS_OFF_SLAB) {
@@ -2530,8 +2435,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2530 2435
2531 if (setup_cpu_cache(cachep, gfp)) { 2436 if (setup_cpu_cache(cachep, gfp)) {
2532 __kmem_cache_destroy(cachep); 2437 __kmem_cache_destroy(cachep);
2533 cachep = NULL; 2438 return NULL;
2534 goto oops;
2535 } 2439 }
2536 2440
2537 if (flags & SLAB_DEBUG_OBJECTS) { 2441 if (flags & SLAB_DEBUG_OBJECTS) {
@@ -2545,18 +2449,9 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2545 } 2449 }
2546 2450
2547 /* cache setup completed, link it into the list */ 2451 /* cache setup completed, link it into the list */
2548 list_add(&cachep->next, &cache_chain); 2452 list_add(&cachep->list, &slab_caches);
2549oops:
2550 if (!cachep && (flags & SLAB_PANIC))
2551 panic("kmem_cache_create(): failed to create slab `%s'\n",
2552 name);
2553 if (slab_is_available()) {
2554 mutex_unlock(&cache_chain_mutex);
2555 put_online_cpus();
2556 }
2557 return cachep; 2453 return cachep;
2558} 2454}
2559EXPORT_SYMBOL(kmem_cache_create);
2560 2455
2561#if DEBUG 2456#if DEBUG
2562static void check_irq_off(void) 2457static void check_irq_off(void)
@@ -2671,7 +2566,7 @@ out:
2671 return nr_freed; 2566 return nr_freed;
2672} 2567}
2673 2568
2674/* Called with cache_chain_mutex held to protect against cpu hotplug */ 2569/* Called with slab_mutex held to protect against cpu hotplug */
2675static int __cache_shrink(struct kmem_cache *cachep) 2570static int __cache_shrink(struct kmem_cache *cachep)
2676{ 2571{
2677 int ret = 0, i = 0; 2572 int ret = 0, i = 0;
@@ -2706,9 +2601,9 @@ int kmem_cache_shrink(struct kmem_cache *cachep)
2706 BUG_ON(!cachep || in_interrupt()); 2601 BUG_ON(!cachep || in_interrupt());
2707 2602
2708 get_online_cpus(); 2603 get_online_cpus();
2709 mutex_lock(&cache_chain_mutex); 2604 mutex_lock(&slab_mutex);
2710 ret = __cache_shrink(cachep); 2605 ret = __cache_shrink(cachep);
2711 mutex_unlock(&cache_chain_mutex); 2606 mutex_unlock(&slab_mutex);
2712 put_online_cpus(); 2607 put_online_cpus();
2713 return ret; 2608 return ret;
2714} 2609}
@@ -2736,15 +2631,15 @@ void kmem_cache_destroy(struct kmem_cache *cachep)
2736 2631
2737 /* Find the cache in the chain of caches. */ 2632 /* Find the cache in the chain of caches. */
2738 get_online_cpus(); 2633 get_online_cpus();
2739 mutex_lock(&cache_chain_mutex); 2634 mutex_lock(&slab_mutex);
2740 /* 2635 /*
2741 * the chain is never empty, cache_cache is never destroyed 2636 * the chain is never empty, cache_cache is never destroyed
2742 */ 2637 */
2743 list_del(&cachep->next); 2638 list_del(&cachep->list);
2744 if (__cache_shrink(cachep)) { 2639 if (__cache_shrink(cachep)) {
2745 slab_error(cachep, "Can't free all objects"); 2640 slab_error(cachep, "Can't free all objects");
2746 list_add(&cachep->next, &cache_chain); 2641 list_add(&cachep->list, &slab_caches);
2747 mutex_unlock(&cache_chain_mutex); 2642 mutex_unlock(&slab_mutex);
2748 put_online_cpus(); 2643 put_online_cpus();
2749 return; 2644 return;
2750 } 2645 }
@@ -2753,7 +2648,7 @@ void kmem_cache_destroy(struct kmem_cache *cachep)
2753 rcu_barrier(); 2648 rcu_barrier();
2754 2649
2755 __kmem_cache_destroy(cachep); 2650 __kmem_cache_destroy(cachep);
2756 mutex_unlock(&cache_chain_mutex); 2651 mutex_unlock(&slab_mutex);
2757 put_online_cpus(); 2652 put_online_cpus();
2758} 2653}
2759EXPORT_SYMBOL(kmem_cache_destroy); 2654EXPORT_SYMBOL(kmem_cache_destroy);
@@ -2840,10 +2735,10 @@ static void cache_init_objs(struct kmem_cache *cachep,
2840 slab_error(cachep, "constructor overwrote the" 2735 slab_error(cachep, "constructor overwrote the"
2841 " start of an object"); 2736 " start of an object");
2842 } 2737 }
2843 if ((cachep->buffer_size % PAGE_SIZE) == 0 && 2738 if ((cachep->size % PAGE_SIZE) == 0 &&
2844 OFF_SLAB(cachep) && cachep->flags & SLAB_POISON) 2739 OFF_SLAB(cachep) && cachep->flags & SLAB_POISON)
2845 kernel_map_pages(virt_to_page(objp), 2740 kernel_map_pages(virt_to_page(objp),
2846 cachep->buffer_size / PAGE_SIZE, 0); 2741 cachep->size / PAGE_SIZE, 0);
2847#else 2742#else
2848 if (cachep->ctor) 2743 if (cachep->ctor)
2849 cachep->ctor(objp); 2744 cachep->ctor(objp);
@@ -2857,9 +2752,9 @@ static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)
2857{ 2752{
2858 if (CONFIG_ZONE_DMA_FLAG) { 2753 if (CONFIG_ZONE_DMA_FLAG) {
2859 if (flags & GFP_DMA) 2754 if (flags & GFP_DMA)
2860 BUG_ON(!(cachep->gfpflags & GFP_DMA)); 2755 BUG_ON(!(cachep->allocflags & GFP_DMA));
2861 else 2756 else
2862 BUG_ON(cachep->gfpflags & GFP_DMA); 2757 BUG_ON(cachep->allocflags & GFP_DMA);
2863 } 2758 }
2864} 2759}
2865 2760
@@ -2918,8 +2813,8 @@ static void slab_map_pages(struct kmem_cache *cache, struct slab *slab,
2918 nr_pages <<= cache->gfporder; 2813 nr_pages <<= cache->gfporder;
2919 2814
2920 do { 2815 do {
2921 page_set_cache(page, cache); 2816 page->slab_cache = cache;
2922 page_set_slab(page, slab); 2817 page->slab_page = slab;
2923 page++; 2818 page++;
2924 } while (--nr_pages); 2819 } while (--nr_pages);
2925} 2820}
@@ -3057,7 +2952,7 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
3057 kfree_debugcheck(objp); 2952 kfree_debugcheck(objp);
3058 page = virt_to_head_page(objp); 2953 page = virt_to_head_page(objp);
3059 2954
3060 slabp = page_get_slab(page); 2955 slabp = page->slab_page;
3061 2956
3062 if (cachep->flags & SLAB_RED_ZONE) { 2957 if (cachep->flags & SLAB_RED_ZONE) {
3063 verify_redzone_free(cachep, objp); 2958 verify_redzone_free(cachep, objp);
@@ -3077,10 +2972,10 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
3077#endif 2972#endif
3078 if (cachep->flags & SLAB_POISON) { 2973 if (cachep->flags & SLAB_POISON) {
3079#ifdef CONFIG_DEBUG_PAGEALLOC 2974#ifdef CONFIG_DEBUG_PAGEALLOC
3080 if ((cachep->buffer_size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) { 2975 if ((cachep->size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) {
3081 store_stackinfo(cachep, objp, (unsigned long)caller); 2976 store_stackinfo(cachep, objp, (unsigned long)caller);
3082 kernel_map_pages(virt_to_page(objp), 2977 kernel_map_pages(virt_to_page(objp),
3083 cachep->buffer_size / PAGE_SIZE, 0); 2978 cachep->size / PAGE_SIZE, 0);
3084 } else { 2979 } else {
3085 poison_obj(cachep, objp, POISON_FREE); 2980 poison_obj(cachep, objp, POISON_FREE);
3086 } 2981 }
@@ -3230,9 +3125,9 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
3230 return objp; 3125 return objp;
3231 if (cachep->flags & SLAB_POISON) { 3126 if (cachep->flags & SLAB_POISON) {
3232#ifdef CONFIG_DEBUG_PAGEALLOC 3127#ifdef CONFIG_DEBUG_PAGEALLOC
3233 if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) 3128 if ((cachep->size % PAGE_SIZE) == 0 && OFF_SLAB(cachep))
3234 kernel_map_pages(virt_to_page(objp), 3129 kernel_map_pages(virt_to_page(objp),
3235 cachep->buffer_size / PAGE_SIZE, 1); 3130 cachep->size / PAGE_SIZE, 1);
3236 else 3131 else
3237 check_poison_obj(cachep, objp); 3132 check_poison_obj(cachep, objp);
3238#else 3133#else
@@ -3261,8 +3156,8 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
3261 struct slab *slabp; 3156 struct slab *slabp;
3262 unsigned objnr; 3157 unsigned objnr;
3263 3158
3264 slabp = page_get_slab(virt_to_head_page(objp)); 3159 slabp = virt_to_head_page(objp)->slab_page;
3265 objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size; 3160 objnr = (unsigned)(objp - slabp->s_mem) / cachep->size;
3266 slab_bufctl(slabp)[objnr] = BUFCTL_ACTIVE; 3161 slab_bufctl(slabp)[objnr] = BUFCTL_ACTIVE;
3267 } 3162 }
3268#endif 3163#endif
@@ -3285,7 +3180,7 @@ static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags)
3285 if (cachep == &cache_cache) 3180 if (cachep == &cache_cache)
3286 return false; 3181 return false;
3287 3182
3288 return should_failslab(obj_size(cachep), flags, cachep->flags); 3183 return should_failslab(cachep->object_size, flags, cachep->flags);
3289} 3184}
3290 3185
3291static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) 3186static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
@@ -3336,7 +3231,7 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
3336 if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD)) 3231 if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
3337 nid_alloc = cpuset_slab_spread_node(); 3232 nid_alloc = cpuset_slab_spread_node();
3338 else if (current->mempolicy) 3233 else if (current->mempolicy)
3339 nid_alloc = slab_node(current->mempolicy); 3234 nid_alloc = slab_node();
3340 if (nid_alloc != nid_here) 3235 if (nid_alloc != nid_here)
3341 return ____cache_alloc_node(cachep, flags, nid_alloc); 3236 return ____cache_alloc_node(cachep, flags, nid_alloc);
3342 return NULL; 3237 return NULL;
@@ -3368,7 +3263,7 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
3368 3263
3369retry_cpuset: 3264retry_cpuset:
3370 cpuset_mems_cookie = get_mems_allowed(); 3265 cpuset_mems_cookie = get_mems_allowed();
3371 zonelist = node_zonelist(slab_node(current->mempolicy), flags); 3266 zonelist = node_zonelist(slab_node(), flags);
3372 3267
3373retry: 3268retry:
3374 /* 3269 /*
@@ -3545,14 +3440,14 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
3545 out: 3440 out:
3546 local_irq_restore(save_flags); 3441 local_irq_restore(save_flags);
3547 ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller); 3442 ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
3548 kmemleak_alloc_recursive(ptr, obj_size(cachep), 1, cachep->flags, 3443 kmemleak_alloc_recursive(ptr, cachep->object_size, 1, cachep->flags,
3549 flags); 3444 flags);
3550 3445
3551 if (likely(ptr)) 3446 if (likely(ptr))
3552 kmemcheck_slab_alloc(cachep, flags, ptr, obj_size(cachep)); 3447 kmemcheck_slab_alloc(cachep, flags, ptr, cachep->object_size);
3553 3448
3554 if (unlikely((flags & __GFP_ZERO) && ptr)) 3449 if (unlikely((flags & __GFP_ZERO) && ptr))
3555 memset(ptr, 0, obj_size(cachep)); 3450 memset(ptr, 0, cachep->object_size);
3556 3451
3557 return ptr; 3452 return ptr;
3558} 3453}
@@ -3607,15 +3502,15 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
3607 objp = __do_cache_alloc(cachep, flags); 3502 objp = __do_cache_alloc(cachep, flags);
3608 local_irq_restore(save_flags); 3503 local_irq_restore(save_flags);
3609 objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller); 3504 objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
3610 kmemleak_alloc_recursive(objp, obj_size(cachep), 1, cachep->flags, 3505 kmemleak_alloc_recursive(objp, cachep->object_size, 1, cachep->flags,
3611 flags); 3506 flags);
3612 prefetchw(objp); 3507 prefetchw(objp);
3613 3508
3614 if (likely(objp)) 3509 if (likely(objp))
3615 kmemcheck_slab_alloc(cachep, flags, objp, obj_size(cachep)); 3510 kmemcheck_slab_alloc(cachep, flags, objp, cachep->object_size);
3616 3511
3617 if (unlikely((flags & __GFP_ZERO) && objp)) 3512 if (unlikely((flags & __GFP_ZERO) && objp))
3618 memset(objp, 0, obj_size(cachep)); 3513 memset(objp, 0, cachep->object_size);
3619 3514
3620 return objp; 3515 return objp;
3621} 3516}
@@ -3731,7 +3626,7 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp,
3731 kmemleak_free_recursive(objp, cachep->flags); 3626 kmemleak_free_recursive(objp, cachep->flags);
3732 objp = cache_free_debugcheck(cachep, objp, caller); 3627 objp = cache_free_debugcheck(cachep, objp, caller);
3733 3628
3734 kmemcheck_slab_free(cachep, objp, obj_size(cachep)); 3629 kmemcheck_slab_free(cachep, objp, cachep->object_size);
3735 3630
3736 /* 3631 /*
3737 * Skip calling cache_free_alien() when the platform is not numa. 3632 * Skip calling cache_free_alien() when the platform is not numa.
@@ -3766,7 +3661,7 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3766 void *ret = __cache_alloc(cachep, flags, __builtin_return_address(0)); 3661 void *ret = __cache_alloc(cachep, flags, __builtin_return_address(0));
3767 3662
3768 trace_kmem_cache_alloc(_RET_IP_, ret, 3663 trace_kmem_cache_alloc(_RET_IP_, ret,
3769 obj_size(cachep), cachep->buffer_size, flags); 3664 cachep->object_size, cachep->size, flags);
3770 3665
3771 return ret; 3666 return ret;
3772} 3667}
@@ -3794,7 +3689,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
3794 __builtin_return_address(0)); 3689 __builtin_return_address(0));
3795 3690
3796 trace_kmem_cache_alloc_node(_RET_IP_, ret, 3691 trace_kmem_cache_alloc_node(_RET_IP_, ret,
3797 obj_size(cachep), cachep->buffer_size, 3692 cachep->object_size, cachep->size,
3798 flags, nodeid); 3693 flags, nodeid);
3799 3694
3800 return ret; 3695 return ret;
@@ -3876,7 +3771,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
3876 ret = __cache_alloc(cachep, flags, caller); 3771 ret = __cache_alloc(cachep, flags, caller);
3877 3772
3878 trace_kmalloc((unsigned long) caller, ret, 3773 trace_kmalloc((unsigned long) caller, ret,
3879 size, cachep->buffer_size, flags); 3774 size, cachep->size, flags);
3880 3775
3881 return ret; 3776 return ret;
3882} 3777}
@@ -3916,9 +3811,9 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
3916 unsigned long flags; 3811 unsigned long flags;
3917 3812
3918 local_irq_save(flags); 3813 local_irq_save(flags);
3919 debug_check_no_locks_freed(objp, obj_size(cachep)); 3814 debug_check_no_locks_freed(objp, cachep->object_size);
3920 if (!(cachep->flags & SLAB_DEBUG_OBJECTS)) 3815 if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
3921 debug_check_no_obj_freed(objp, obj_size(cachep)); 3816 debug_check_no_obj_freed(objp, cachep->object_size);
3922 __cache_free(cachep, objp, __builtin_return_address(0)); 3817 __cache_free(cachep, objp, __builtin_return_address(0));
3923 local_irq_restore(flags); 3818 local_irq_restore(flags);
3924 3819
@@ -3947,8 +3842,9 @@ void kfree(const void *objp)
3947 local_irq_save(flags); 3842 local_irq_save(flags);
3948 kfree_debugcheck(objp); 3843 kfree_debugcheck(objp);
3949 c = virt_to_cache(objp); 3844 c = virt_to_cache(objp);
3950 debug_check_no_locks_freed(objp, obj_size(c)); 3845 debug_check_no_locks_freed(objp, c->object_size);
3951 debug_check_no_obj_freed(objp, obj_size(c)); 3846
3847 debug_check_no_obj_freed(objp, c->object_size);
3952 __cache_free(c, (void *)objp, __builtin_return_address(0)); 3848 __cache_free(c, (void *)objp, __builtin_return_address(0));
3953 local_irq_restore(flags); 3849 local_irq_restore(flags);
3954} 3850}
@@ -3956,7 +3852,7 @@ EXPORT_SYMBOL(kfree);
3956 3852
3957unsigned int kmem_cache_size(struct kmem_cache *cachep) 3853unsigned int kmem_cache_size(struct kmem_cache *cachep)
3958{ 3854{
3959 return obj_size(cachep); 3855 return cachep->object_size;
3960} 3856}
3961EXPORT_SYMBOL(kmem_cache_size); 3857EXPORT_SYMBOL(kmem_cache_size);
3962 3858
@@ -4030,7 +3926,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
4030 return 0; 3926 return 0;
4031 3927
4032fail: 3928fail:
4033 if (!cachep->next.next) { 3929 if (!cachep->list.next) {
4034 /* Cache is not active yet. Roll back what we did */ 3930 /* Cache is not active yet. Roll back what we did */
4035 node--; 3931 node--;
4036 while (node >= 0) { 3932 while (node >= 0) {
@@ -4065,7 +3961,7 @@ static void do_ccupdate_local(void *info)
4065 new->new[smp_processor_id()] = old; 3961 new->new[smp_processor_id()] = old;
4066} 3962}
4067 3963
4068/* Always called with the cache_chain_mutex held */ 3964/* Always called with the slab_mutex held */
4069static int do_tune_cpucache(struct kmem_cache *cachep, int limit, 3965static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
4070 int batchcount, int shared, gfp_t gfp) 3966 int batchcount, int shared, gfp_t gfp)
4071{ 3967{
@@ -4109,7 +4005,7 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
4109 return alloc_kmemlist(cachep, gfp); 4005 return alloc_kmemlist(cachep, gfp);
4110} 4006}
4111 4007
4112/* Called with cache_chain_mutex held always */ 4008/* Called with slab_mutex held always */
4113static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp) 4009static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
4114{ 4010{
4115 int err; 4011 int err;
@@ -4124,13 +4020,13 @@ static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
4124 * The numbers are guessed, we should auto-tune as described by 4020 * The numbers are guessed, we should auto-tune as described by
4125 * Bonwick. 4021 * Bonwick.
4126 */ 4022 */
4127 if (cachep->buffer_size > 131072) 4023 if (cachep->size > 131072)
4128 limit = 1; 4024 limit = 1;
4129 else if (cachep->buffer_size > PAGE_SIZE) 4025 else if (cachep->size > PAGE_SIZE)
4130 limit = 8; 4026 limit = 8;
4131 else if (cachep->buffer_size > 1024) 4027 else if (cachep->size > 1024)
4132 limit = 24; 4028 limit = 24;
4133 else if (cachep->buffer_size > 256) 4029 else if (cachep->size > 256)
4134 limit = 54; 4030 limit = 54;
4135 else 4031 else
4136 limit = 120; 4032 limit = 120;
@@ -4145,7 +4041,7 @@ static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
4145 * to a larger limit. Thus disabled by default. 4041 * to a larger limit. Thus disabled by default.
4146 */ 4042 */
4147 shared = 0; 4043 shared = 0;
4148 if (cachep->buffer_size <= PAGE_SIZE && num_possible_cpus() > 1) 4044 if (cachep->size <= PAGE_SIZE && num_possible_cpus() > 1)
4149 shared = 8; 4045 shared = 8;
4150 4046
4151#if DEBUG 4047#if DEBUG
@@ -4211,11 +4107,11 @@ static void cache_reap(struct work_struct *w)
4211 int node = numa_mem_id(); 4107 int node = numa_mem_id();
4212 struct delayed_work *work = to_delayed_work(w); 4108 struct delayed_work *work = to_delayed_work(w);
4213 4109
4214 if (!mutex_trylock(&cache_chain_mutex)) 4110 if (!mutex_trylock(&slab_mutex))
4215 /* Give up. Setup the next iteration. */ 4111 /* Give up. Setup the next iteration. */
4216 goto out; 4112 goto out;
4217 4113
4218 list_for_each_entry(searchp, &cache_chain, next) { 4114 list_for_each_entry(searchp, &slab_caches, list) {
4219 check_irq_on(); 4115 check_irq_on();
4220 4116
4221 /* 4117 /*
@@ -4253,7 +4149,7 @@ next:
4253 cond_resched(); 4149 cond_resched();
4254 } 4150 }
4255 check_irq_on(); 4151 check_irq_on();
4256 mutex_unlock(&cache_chain_mutex); 4152 mutex_unlock(&slab_mutex);
4257 next_reap_node(); 4153 next_reap_node();
4258out: 4154out:
4259 /* Set up the next iteration */ 4155 /* Set up the next iteration */
@@ -4289,26 +4185,26 @@ static void *s_start(struct seq_file *m, loff_t *pos)
4289{ 4185{
4290 loff_t n = *pos; 4186 loff_t n = *pos;
4291 4187
4292 mutex_lock(&cache_chain_mutex); 4188 mutex_lock(&slab_mutex);
4293 if (!n) 4189 if (!n)
4294 print_slabinfo_header(m); 4190 print_slabinfo_header(m);
4295 4191
4296 return seq_list_start(&cache_chain, *pos); 4192 return seq_list_start(&slab_caches, *pos);
4297} 4193}
4298 4194
4299static void *s_next(struct seq_file *m, void *p, loff_t *pos) 4195static void *s_next(struct seq_file *m, void *p, loff_t *pos)
4300{ 4196{
4301 return seq_list_next(p, &cache_chain, pos); 4197 return seq_list_next(p, &slab_caches, pos);
4302} 4198}
4303 4199
4304static void s_stop(struct seq_file *m, void *p) 4200static void s_stop(struct seq_file *m, void *p)
4305{ 4201{
4306 mutex_unlock(&cache_chain_mutex); 4202 mutex_unlock(&slab_mutex);
4307} 4203}
4308 4204
4309static int s_show(struct seq_file *m, void *p) 4205static int s_show(struct seq_file *m, void *p)
4310{ 4206{
4311 struct kmem_cache *cachep = list_entry(p, struct kmem_cache, next); 4207 struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list);
4312 struct slab *slabp; 4208 struct slab *slabp;
4313 unsigned long active_objs; 4209 unsigned long active_objs;
4314 unsigned long num_objs; 4210 unsigned long num_objs;
@@ -4364,7 +4260,7 @@ static int s_show(struct seq_file *m, void *p)
4364 printk(KERN_ERR "slab: cache %s error: %s\n", name, error); 4260 printk(KERN_ERR "slab: cache %s error: %s\n", name, error);
4365 4261
4366 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", 4262 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
4367 name, active_objs, num_objs, cachep->buffer_size, 4263 name, active_objs, num_objs, cachep->size,
4368 cachep->num, (1 << cachep->gfporder)); 4264 cachep->num, (1 << cachep->gfporder));
4369 seq_printf(m, " : tunables %4u %4u %4u", 4265 seq_printf(m, " : tunables %4u %4u %4u",
4370 cachep->limit, cachep->batchcount, cachep->shared); 4266 cachep->limit, cachep->batchcount, cachep->shared);
@@ -4454,9 +4350,9 @@ static ssize_t slabinfo_write(struct file *file, const char __user *buffer,
4454 return -EINVAL; 4350 return -EINVAL;
4455 4351
4456 /* Find the cache in the chain of caches. */ 4352 /* Find the cache in the chain of caches. */
4457 mutex_lock(&cache_chain_mutex); 4353 mutex_lock(&slab_mutex);
4458 res = -EINVAL; 4354 res = -EINVAL;
4459 list_for_each_entry(cachep, &cache_chain, next) { 4355 list_for_each_entry(cachep, &slab_caches, list) {
4460 if (!strcmp(cachep->name, kbuf)) { 4356 if (!strcmp(cachep->name, kbuf)) {
4461 if (limit < 1 || batchcount < 1 || 4357 if (limit < 1 || batchcount < 1 ||
4462 batchcount > limit || shared < 0) { 4358 batchcount > limit || shared < 0) {
@@ -4469,7 +4365,7 @@ static ssize_t slabinfo_write(struct file *file, const char __user *buffer,
4469 break; 4365 break;
4470 } 4366 }
4471 } 4367 }
4472 mutex_unlock(&cache_chain_mutex); 4368 mutex_unlock(&slab_mutex);
4473 if (res >= 0) 4369 if (res >= 0)
4474 res = count; 4370 res = count;
4475 return res; 4371 return res;
@@ -4492,8 +4388,8 @@ static const struct file_operations proc_slabinfo_operations = {
4492 4388
4493static void *leaks_start(struct seq_file *m, loff_t *pos) 4389static void *leaks_start(struct seq_file *m, loff_t *pos)
4494{ 4390{
4495 mutex_lock(&cache_chain_mutex); 4391 mutex_lock(&slab_mutex);
4496 return seq_list_start(&cache_chain, *pos); 4392 return seq_list_start(&slab_caches, *pos);
4497} 4393}
4498 4394
4499static inline int add_caller(unsigned long *n, unsigned long v) 4395static inline int add_caller(unsigned long *n, unsigned long v)
@@ -4532,7 +4428,7 @@ static void handle_slab(unsigned long *n, struct kmem_cache *c, struct slab *s)
4532 int i; 4428 int i;
4533 if (n[0] == n[1]) 4429 if (n[0] == n[1])
4534 return; 4430 return;
4535 for (i = 0, p = s->s_mem; i < c->num; i++, p += c->buffer_size) { 4431 for (i = 0, p = s->s_mem; i < c->num; i++, p += c->size) {
4536 if (slab_bufctl(s)[i] != BUFCTL_ACTIVE) 4432 if (slab_bufctl(s)[i] != BUFCTL_ACTIVE)
4537 continue; 4433 continue;
4538 if (!add_caller(n, (unsigned long)*dbg_userword(c, p))) 4434 if (!add_caller(n, (unsigned long)*dbg_userword(c, p)))
@@ -4558,7 +4454,7 @@ static void show_symbol(struct seq_file *m, unsigned long address)
4558 4454
4559static int leaks_show(struct seq_file *m, void *p) 4455static int leaks_show(struct seq_file *m, void *p)
4560{ 4456{
4561 struct kmem_cache *cachep = list_entry(p, struct kmem_cache, next); 4457 struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list);
4562 struct slab *slabp; 4458 struct slab *slabp;
4563 struct kmem_list3 *l3; 4459 struct kmem_list3 *l3;
4564 const char *name; 4460 const char *name;
@@ -4592,17 +4488,17 @@ static int leaks_show(struct seq_file *m, void *p)
4592 name = cachep->name; 4488 name = cachep->name;
4593 if (n[0] == n[1]) { 4489 if (n[0] == n[1]) {
4594 /* Increase the buffer size */ 4490 /* Increase the buffer size */
4595 mutex_unlock(&cache_chain_mutex); 4491 mutex_unlock(&slab_mutex);
4596 m->private = kzalloc(n[0] * 4 * sizeof(unsigned long), GFP_KERNEL); 4492 m->private = kzalloc(n[0] * 4 * sizeof(unsigned long), GFP_KERNEL);
4597 if (!m->private) { 4493 if (!m->private) {
4598 /* Too bad, we are really out */ 4494 /* Too bad, we are really out */
4599 m->private = n; 4495 m->private = n;
4600 mutex_lock(&cache_chain_mutex); 4496 mutex_lock(&slab_mutex);
4601 return -ENOMEM; 4497 return -ENOMEM;
4602 } 4498 }
4603 *(unsigned long *)m->private = n[0] * 2; 4499 *(unsigned long *)m->private = n[0] * 2;
4604 kfree(n); 4500 kfree(n);
4605 mutex_lock(&cache_chain_mutex); 4501 mutex_lock(&slab_mutex);
4606 /* Now make sure this entry will be retried */ 4502 /* Now make sure this entry will be retried */
4607 m->count = m->size; 4503 m->count = m->size;
4608 return 0; 4504 return 0;
@@ -4677,6 +4573,6 @@ size_t ksize(const void *objp)
4677 if (unlikely(objp == ZERO_SIZE_PTR)) 4573 if (unlikely(objp == ZERO_SIZE_PTR))
4678 return 0; 4574 return 0;
4679 4575
4680 return obj_size(virt_to_cache(objp)); 4576 return virt_to_cache(objp)->object_size;
4681} 4577}
4682EXPORT_SYMBOL(ksize); 4578EXPORT_SYMBOL(ksize);
diff --git a/mm/slab.h b/mm/slab.h
new file mode 100644
index 000000000000..db7848caaa25
--- /dev/null
+++ b/mm/slab.h
@@ -0,0 +1,33 @@
1#ifndef MM_SLAB_H
2#define MM_SLAB_H
3/*
4 * Internal slab definitions
5 */
6
7/*
8 * State of the slab allocator.
9 *
10 * This is used to describe the states of the allocator during bootup.
11 * Allocators use this to gradually bootstrap themselves. Most allocators
12 * have the problem that the structures used for managing slab caches are
13 * allocated from slab caches themselves.
14 */
15enum slab_state {
16 DOWN, /* No slab functionality yet */
17 PARTIAL, /* SLUB: kmem_cache_node available */
18 PARTIAL_ARRAYCACHE, /* SLAB: kmalloc size for arraycache available */
19 PARTIAL_L3, /* SLAB: kmalloc size for l3 struct available */
20 UP, /* Slab caches usable but not all extras yet */
21 FULL /* Everything is working */
22};
23
24extern enum slab_state slab_state;
25
26/* The slab cache mutex protects the management structures during changes */
27extern struct mutex slab_mutex;
28extern struct list_head slab_caches;
29
30struct kmem_cache *__kmem_cache_create(const char *name, size_t size,
31 size_t align, unsigned long flags, void (*ctor)(void *));
32
33#endif
diff --git a/mm/slab_common.c b/mm/slab_common.c
new file mode 100644
index 000000000000..aa3ca5bb01b5
--- /dev/null
+++ b/mm/slab_common.c
@@ -0,0 +1,120 @@
1/*
2 * Slab allocator functions that are independent of the allocator strategy
3 *
4 * (C) 2012 Christoph Lameter <cl@linux.com>
5 */
6#include <linux/slab.h>
7
8#include <linux/mm.h>
9#include <linux/poison.h>
10#include <linux/interrupt.h>
11#include <linux/memory.h>
12#include <linux/compiler.h>
13#include <linux/module.h>
14#include <linux/cpu.h>
15#include <linux/uaccess.h>
16#include <asm/cacheflush.h>
17#include <asm/tlbflush.h>
18#include <asm/page.h>
19
20#include "slab.h"
21
22enum slab_state slab_state;
23LIST_HEAD(slab_caches);
24DEFINE_MUTEX(slab_mutex);
25
26/*
27 * kmem_cache_create - Create a cache.
28 * @name: A string which is used in /proc/slabinfo to identify this cache.
29 * @size: The size of objects to be created in this cache.
30 * @align: The required alignment for the objects.
31 * @flags: SLAB flags
32 * @ctor: A constructor for the objects.
33 *
34 * Returns a ptr to the cache on success, NULL on failure.
35 * Cannot be called within a interrupt, but can be interrupted.
36 * The @ctor is run when new pages are allocated by the cache.
37 *
38 * The flags are
39 *
40 * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
41 * to catch references to uninitialised memory.
42 *
43 * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
44 * for buffer overruns.
45 *
46 * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
47 * cacheline. This can be beneficial if you're counting cycles as closely
48 * as davem.
49 */
50
51struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align,
52 unsigned long flags, void (*ctor)(void *))
53{
54 struct kmem_cache *s = NULL;
55
56#ifdef CONFIG_DEBUG_VM
57 if (!name || in_interrupt() || size < sizeof(void *) ||
58 size > KMALLOC_MAX_SIZE) {
59 printk(KERN_ERR "kmem_cache_create(%s) integrity check"
60 " failed\n", name);
61 goto out;
62 }
63#endif
64
65 get_online_cpus();
66 mutex_lock(&slab_mutex);
67
68#ifdef CONFIG_DEBUG_VM
69 list_for_each_entry(s, &slab_caches, list) {
70 char tmp;
71 int res;
72
73 /*
74 * This happens when the module gets unloaded and doesn't
75 * destroy its slab cache and no-one else reuses the vmalloc
76 * area of the module. Print a warning.
77 */
78 res = probe_kernel_address(s->name, tmp);
79 if (res) {
80 printk(KERN_ERR
81 "Slab cache with size %d has lost its name\n",
82 s->object_size);
83 continue;
84 }
85
86 if (!strcmp(s->name, name)) {
87 printk(KERN_ERR "kmem_cache_create(%s): Cache name"
88 " already exists.\n",
89 name);
90 dump_stack();
91 s = NULL;
92 goto oops;
93 }
94 }
95
96 WARN_ON(strchr(name, ' ')); /* It confuses parsers */
97#endif
98
99 s = __kmem_cache_create(name, size, align, flags, ctor);
100
101#ifdef CONFIG_DEBUG_VM
102oops:
103#endif
104 mutex_unlock(&slab_mutex);
105 put_online_cpus();
106
107#ifdef CONFIG_DEBUG_VM
108out:
109#endif
110 if (!s && (flags & SLAB_PANIC))
111 panic("kmem_cache_create: Failed to create slab '%s'\n", name);
112
113 return s;
114}
115EXPORT_SYMBOL(kmem_cache_create);
116
117int slab_is_available(void)
118{
119 return slab_state >= UP;
120}
diff --git a/mm/slob.c b/mm/slob.c
index 8105be42cad1..45d4ca79933a 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -59,6 +59,8 @@
59 59
60#include <linux/kernel.h> 60#include <linux/kernel.h>
61#include <linux/slab.h> 61#include <linux/slab.h>
62#include "slab.h"
63
62#include <linux/mm.h> 64#include <linux/mm.h>
63#include <linux/swap.h> /* struct reclaim_state */ 65#include <linux/swap.h> /* struct reclaim_state */
64#include <linux/cache.h> 66#include <linux/cache.h>
@@ -92,36 +94,6 @@ struct slob_block {
92typedef struct slob_block slob_t; 94typedef struct slob_block slob_t;
93 95
94/* 96/*
95 * We use struct page fields to manage some slob allocation aspects,
96 * however to avoid the horrible mess in include/linux/mm_types.h, we'll
97 * just define our own struct page type variant here.
98 */
99struct slob_page {
100 union {
101 struct {
102 unsigned long flags; /* mandatory */
103 atomic_t _count; /* mandatory */
104 slobidx_t units; /* free units left in page */
105 unsigned long pad[2];
106 slob_t *free; /* first free slob_t in page */
107 struct list_head list; /* linked list of free pages */
108 };
109 struct page page;
110 };
111};
112static inline void struct_slob_page_wrong_size(void)
113{ BUILD_BUG_ON(sizeof(struct slob_page) != sizeof(struct page)); }
114
115/*
116 * free_slob_page: call before a slob_page is returned to the page allocator.
117 */
118static inline void free_slob_page(struct slob_page *sp)
119{
120 reset_page_mapcount(&sp->page);
121 sp->page.mapping = NULL;
122}
123
124/*
125 * All partially free slob pages go on these lists. 97 * All partially free slob pages go on these lists.
126 */ 98 */
127#define SLOB_BREAK1 256 99#define SLOB_BREAK1 256
@@ -131,46 +103,23 @@ static LIST_HEAD(free_slob_medium);
131static LIST_HEAD(free_slob_large); 103static LIST_HEAD(free_slob_large);
132 104
133/* 105/*
134 * is_slob_page: True for all slob pages (false for bigblock pages)
135 */
136static inline int is_slob_page(struct slob_page *sp)
137{
138 return PageSlab((struct page *)sp);
139}
140
141static inline void set_slob_page(struct slob_page *sp)
142{
143 __SetPageSlab((struct page *)sp);
144}
145
146static inline void clear_slob_page(struct slob_page *sp)
147{
148 __ClearPageSlab((struct page *)sp);
149}
150
151static inline struct slob_page *slob_page(const void *addr)
152{
153 return (struct slob_page *)virt_to_page(addr);
154}
155
156/*
157 * slob_page_free: true for pages on free_slob_pages list. 106 * slob_page_free: true for pages on free_slob_pages list.
158 */ 107 */
159static inline int slob_page_free(struct slob_page *sp) 108static inline int slob_page_free(struct page *sp)
160{ 109{
161 return PageSlobFree((struct page *)sp); 110 return PageSlobFree(sp);
162} 111}
163 112
164static void set_slob_page_free(struct slob_page *sp, struct list_head *list) 113static void set_slob_page_free(struct page *sp, struct list_head *list)
165{ 114{
166 list_add(&sp->list, list); 115 list_add(&sp->list, list);
167 __SetPageSlobFree((struct page *)sp); 116 __SetPageSlobFree(sp);
168} 117}
169 118
170static inline void clear_slob_page_free(struct slob_page *sp) 119static inline void clear_slob_page_free(struct page *sp)
171{ 120{
172 list_del(&sp->list); 121 list_del(&sp->list);
173 __ClearPageSlobFree((struct page *)sp); 122 __ClearPageSlobFree(sp);
174} 123}
175 124
176#define SLOB_UNIT sizeof(slob_t) 125#define SLOB_UNIT sizeof(slob_t)
@@ -267,12 +216,12 @@ static void slob_free_pages(void *b, int order)
267/* 216/*
268 * Allocate a slob block within a given slob_page sp. 217 * Allocate a slob block within a given slob_page sp.
269 */ 218 */
270static void *slob_page_alloc(struct slob_page *sp, size_t size, int align) 219static void *slob_page_alloc(struct page *sp, size_t size, int align)
271{ 220{
272 slob_t *prev, *cur, *aligned = NULL; 221 slob_t *prev, *cur, *aligned = NULL;
273 int delta = 0, units = SLOB_UNITS(size); 222 int delta = 0, units = SLOB_UNITS(size);
274 223
275 for (prev = NULL, cur = sp->free; ; prev = cur, cur = slob_next(cur)) { 224 for (prev = NULL, cur = sp->freelist; ; prev = cur, cur = slob_next(cur)) {
276 slobidx_t avail = slob_units(cur); 225 slobidx_t avail = slob_units(cur);
277 226
278 if (align) { 227 if (align) {
@@ -296,12 +245,12 @@ static void *slob_page_alloc(struct slob_page *sp, size_t size, int align)
296 if (prev) 245 if (prev)
297 set_slob(prev, slob_units(prev), next); 246 set_slob(prev, slob_units(prev), next);
298 else 247 else
299 sp->free = next; 248 sp->freelist = next;
300 } else { /* fragment */ 249 } else { /* fragment */
301 if (prev) 250 if (prev)
302 set_slob(prev, slob_units(prev), cur + units); 251 set_slob(prev, slob_units(prev), cur + units);
303 else 252 else
304 sp->free = cur + units; 253 sp->freelist = cur + units;
305 set_slob(cur + units, avail - units, next); 254 set_slob(cur + units, avail - units, next);
306 } 255 }
307 256
@@ -320,7 +269,7 @@ static void *slob_page_alloc(struct slob_page *sp, size_t size, int align)
320 */ 269 */
321static void *slob_alloc(size_t size, gfp_t gfp, int align, int node) 270static void *slob_alloc(size_t size, gfp_t gfp, int align, int node)
322{ 271{
323 struct slob_page *sp; 272 struct page *sp;
324 struct list_head *prev; 273 struct list_head *prev;
325 struct list_head *slob_list; 274 struct list_head *slob_list;
326 slob_t *b = NULL; 275 slob_t *b = NULL;
@@ -341,7 +290,7 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node)
341 * If there's a node specification, search for a partial 290 * If there's a node specification, search for a partial
342 * page with a matching node id in the freelist. 291 * page with a matching node id in the freelist.
343 */ 292 */
344 if (node != -1 && page_to_nid(&sp->page) != node) 293 if (node != -1 && page_to_nid(sp) != node)
345 continue; 294 continue;
346#endif 295#endif
347 /* Enough room on this page? */ 296 /* Enough room on this page? */
@@ -369,12 +318,12 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node)
369 b = slob_new_pages(gfp & ~__GFP_ZERO, 0, node); 318 b = slob_new_pages(gfp & ~__GFP_ZERO, 0, node);
370 if (!b) 319 if (!b)
371 return NULL; 320 return NULL;
372 sp = slob_page(b); 321 sp = virt_to_page(b);
373 set_slob_page(sp); 322 __SetPageSlab(sp);
374 323
375 spin_lock_irqsave(&slob_lock, flags); 324 spin_lock_irqsave(&slob_lock, flags);
376 sp->units = SLOB_UNITS(PAGE_SIZE); 325 sp->units = SLOB_UNITS(PAGE_SIZE);
377 sp->free = b; 326 sp->freelist = b;
378 INIT_LIST_HEAD(&sp->list); 327 INIT_LIST_HEAD(&sp->list);
379 set_slob(b, SLOB_UNITS(PAGE_SIZE), b + SLOB_UNITS(PAGE_SIZE)); 328 set_slob(b, SLOB_UNITS(PAGE_SIZE), b + SLOB_UNITS(PAGE_SIZE));
380 set_slob_page_free(sp, slob_list); 329 set_slob_page_free(sp, slob_list);
@@ -392,7 +341,7 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node)
392 */ 341 */
393static void slob_free(void *block, int size) 342static void slob_free(void *block, int size)
394{ 343{
395 struct slob_page *sp; 344 struct page *sp;
396 slob_t *prev, *next, *b = (slob_t *)block; 345 slob_t *prev, *next, *b = (slob_t *)block;
397 slobidx_t units; 346 slobidx_t units;
398 unsigned long flags; 347 unsigned long flags;
@@ -402,7 +351,7 @@ static void slob_free(void *block, int size)
402 return; 351 return;
403 BUG_ON(!size); 352 BUG_ON(!size);
404 353
405 sp = slob_page(block); 354 sp = virt_to_page(block);
406 units = SLOB_UNITS(size); 355 units = SLOB_UNITS(size);
407 356
408 spin_lock_irqsave(&slob_lock, flags); 357 spin_lock_irqsave(&slob_lock, flags);
@@ -412,8 +361,8 @@ static void slob_free(void *block, int size)
412 if (slob_page_free(sp)) 361 if (slob_page_free(sp))
413 clear_slob_page_free(sp); 362 clear_slob_page_free(sp);
414 spin_unlock_irqrestore(&slob_lock, flags); 363 spin_unlock_irqrestore(&slob_lock, flags);
415 clear_slob_page(sp); 364 __ClearPageSlab(sp);
416 free_slob_page(sp); 365 reset_page_mapcount(sp);
417 slob_free_pages(b, 0); 366 slob_free_pages(b, 0);
418 return; 367 return;
419 } 368 }
@@ -421,7 +370,7 @@ static void slob_free(void *block, int size)
421 if (!slob_page_free(sp)) { 370 if (!slob_page_free(sp)) {
422 /* This slob page is about to become partially free. Easy! */ 371 /* This slob page is about to become partially free. Easy! */
423 sp->units = units; 372 sp->units = units;
424 sp->free = b; 373 sp->freelist = b;
425 set_slob(b, units, 374 set_slob(b, units,
426 (void *)((unsigned long)(b + 375 (void *)((unsigned long)(b +
427 SLOB_UNITS(PAGE_SIZE)) & PAGE_MASK)); 376 SLOB_UNITS(PAGE_SIZE)) & PAGE_MASK));
@@ -441,15 +390,15 @@ static void slob_free(void *block, int size)
441 */ 390 */
442 sp->units += units; 391 sp->units += units;
443 392
444 if (b < sp->free) { 393 if (b < (slob_t *)sp->freelist) {
445 if (b + units == sp->free) { 394 if (b + units == sp->freelist) {
446 units += slob_units(sp->free); 395 units += slob_units(sp->freelist);
447 sp->free = slob_next(sp->free); 396 sp->freelist = slob_next(sp->freelist);
448 } 397 }
449 set_slob(b, units, sp->free); 398 set_slob(b, units, sp->freelist);
450 sp->free = b; 399 sp->freelist = b;
451 } else { 400 } else {
452 prev = sp->free; 401 prev = sp->freelist;
453 next = slob_next(prev); 402 next = slob_next(prev);
454 while (b > next) { 403 while (b > next) {
455 prev = next; 404 prev = next;
@@ -522,7 +471,7 @@ EXPORT_SYMBOL(__kmalloc_node);
522 471
523void kfree(const void *block) 472void kfree(const void *block)
524{ 473{
525 struct slob_page *sp; 474 struct page *sp;
526 475
527 trace_kfree(_RET_IP_, block); 476 trace_kfree(_RET_IP_, block);
528 477
@@ -530,43 +479,36 @@ void kfree(const void *block)
530 return; 479 return;
531 kmemleak_free(block); 480 kmemleak_free(block);
532 481
533 sp = slob_page(block); 482 sp = virt_to_page(block);
534 if (is_slob_page(sp)) { 483 if (PageSlab(sp)) {
535 int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); 484 int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
536 unsigned int *m = (unsigned int *)(block - align); 485 unsigned int *m = (unsigned int *)(block - align);
537 slob_free(m, *m + align); 486 slob_free(m, *m + align);
538 } else 487 } else
539 put_page(&sp->page); 488 put_page(sp);
540} 489}
541EXPORT_SYMBOL(kfree); 490EXPORT_SYMBOL(kfree);
542 491
543/* can't use ksize for kmem_cache_alloc memory, only kmalloc */ 492/* can't use ksize for kmem_cache_alloc memory, only kmalloc */
544size_t ksize(const void *block) 493size_t ksize(const void *block)
545{ 494{
546 struct slob_page *sp; 495 struct page *sp;
547 496
548 BUG_ON(!block); 497 BUG_ON(!block);
549 if (unlikely(block == ZERO_SIZE_PTR)) 498 if (unlikely(block == ZERO_SIZE_PTR))
550 return 0; 499 return 0;
551 500
552 sp = slob_page(block); 501 sp = virt_to_page(block);
553 if (is_slob_page(sp)) { 502 if (PageSlab(sp)) {
554 int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); 503 int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
555 unsigned int *m = (unsigned int *)(block - align); 504 unsigned int *m = (unsigned int *)(block - align);
556 return SLOB_UNITS(*m) * SLOB_UNIT; 505 return SLOB_UNITS(*m) * SLOB_UNIT;
557 } else 506 } else
558 return sp->page.private; 507 return sp->private;
559} 508}
560EXPORT_SYMBOL(ksize); 509EXPORT_SYMBOL(ksize);
561 510
562struct kmem_cache { 511struct kmem_cache *__kmem_cache_create(const char *name, size_t size,
563 unsigned int size, align;
564 unsigned long flags;
565 const char *name;
566 void (*ctor)(void *);
567};
568
569struct kmem_cache *kmem_cache_create(const char *name, size_t size,
570 size_t align, unsigned long flags, void (*ctor)(void *)) 512 size_t align, unsigned long flags, void (*ctor)(void *))
571{ 513{
572 struct kmem_cache *c; 514 struct kmem_cache *c;
@@ -589,13 +531,12 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
589 c->align = ARCH_SLAB_MINALIGN; 531 c->align = ARCH_SLAB_MINALIGN;
590 if (c->align < align) 532 if (c->align < align)
591 c->align = align; 533 c->align = align;
592 } else if (flags & SLAB_PANIC)
593 panic("Cannot create slab cache %s\n", name);
594 534
595 kmemleak_alloc(c, sizeof(struct kmem_cache), 1, GFP_KERNEL); 535 kmemleak_alloc(c, sizeof(struct kmem_cache), 1, GFP_KERNEL);
536 c->refcount = 1;
537 }
596 return c; 538 return c;
597} 539}
598EXPORT_SYMBOL(kmem_cache_create);
599 540
600void kmem_cache_destroy(struct kmem_cache *c) 541void kmem_cache_destroy(struct kmem_cache *c)
601{ 542{
@@ -678,19 +619,12 @@ int kmem_cache_shrink(struct kmem_cache *d)
678} 619}
679EXPORT_SYMBOL(kmem_cache_shrink); 620EXPORT_SYMBOL(kmem_cache_shrink);
680 621
681static unsigned int slob_ready __read_mostly;
682
683int slab_is_available(void)
684{
685 return slob_ready;
686}
687
688void __init kmem_cache_init(void) 622void __init kmem_cache_init(void)
689{ 623{
690 slob_ready = 1; 624 slab_state = UP;
691} 625}
692 626
693void __init kmem_cache_init_late(void) 627void __init kmem_cache_init_late(void)
694{ 628{
695 /* Nothing to do */ 629 slab_state = FULL;
696} 630}
diff --git a/mm/slub.c b/mm/slub.c
index 8c691fa1cf3c..e517d435e5dc 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -16,6 +16,7 @@
16#include <linux/interrupt.h> 16#include <linux/interrupt.h>
17#include <linux/bitops.h> 17#include <linux/bitops.h>
18#include <linux/slab.h> 18#include <linux/slab.h>
19#include "slab.h"
19#include <linux/proc_fs.h> 20#include <linux/proc_fs.h>
20#include <linux/seq_file.h> 21#include <linux/seq_file.h>
21#include <linux/kmemcheck.h> 22#include <linux/kmemcheck.h>
@@ -35,13 +36,13 @@
35 36
36/* 37/*
37 * Lock order: 38 * Lock order:
38 * 1. slub_lock (Global Semaphore) 39 * 1. slab_mutex (Global Mutex)
39 * 2. node->list_lock 40 * 2. node->list_lock
40 * 3. slab_lock(page) (Only on some arches and for debugging) 41 * 3. slab_lock(page) (Only on some arches and for debugging)
41 * 42 *
42 * slub_lock 43 * slab_mutex
43 * 44 *
44 * The role of the slub_lock is to protect the list of all the slabs 45 * The role of the slab_mutex is to protect the list of all the slabs
45 * and to synchronize major metadata changes to slab cache structures. 46 * and to synchronize major metadata changes to slab cache structures.
46 * 47 *
47 * The slab_lock is only used for debugging and on arches that do not 48 * The slab_lock is only used for debugging and on arches that do not
@@ -182,17 +183,6 @@ static int kmem_size = sizeof(struct kmem_cache);
182static struct notifier_block slab_notifier; 183static struct notifier_block slab_notifier;
183#endif 184#endif
184 185
185static enum {
186 DOWN, /* No slab functionality available */
187 PARTIAL, /* Kmem_cache_node works */
188 UP, /* Everything works but does not show up in sysfs */
189 SYSFS /* Sysfs up */
190} slab_state = DOWN;
191
192/* A list of all slab caches on the system */
193static DECLARE_RWSEM(slub_lock);
194static LIST_HEAD(slab_caches);
195
196/* 186/*
197 * Tracking user of a slab. 187 * Tracking user of a slab.
198 */ 188 */
@@ -237,11 +227,6 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si)
237 * Core slab cache functions 227 * Core slab cache functions
238 *******************************************************************/ 228 *******************************************************************/
239 229
240int slab_is_available(void)
241{
242 return slab_state >= UP;
243}
244
245static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) 230static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
246{ 231{
247 return s->node[node]; 232 return s->node[node];
@@ -311,7 +296,7 @@ static inline size_t slab_ksize(const struct kmem_cache *s)
311 * and whatever may come after it. 296 * and whatever may come after it.
312 */ 297 */
313 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) 298 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
314 return s->objsize; 299 return s->object_size;
315 300
316#endif 301#endif
317 /* 302 /*
@@ -609,11 +594,11 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
609 if (p > addr + 16) 594 if (p > addr + 16)
610 print_section("Bytes b4 ", p - 16, 16); 595 print_section("Bytes b4 ", p - 16, 16);
611 596
612 print_section("Object ", p, min_t(unsigned long, s->objsize, 597 print_section("Object ", p, min_t(unsigned long, s->object_size,
613 PAGE_SIZE)); 598 PAGE_SIZE));
614 if (s->flags & SLAB_RED_ZONE) 599 if (s->flags & SLAB_RED_ZONE)
615 print_section("Redzone ", p + s->objsize, 600 print_section("Redzone ", p + s->object_size,
616 s->inuse - s->objsize); 601 s->inuse - s->object_size);
617 602
618 if (s->offset) 603 if (s->offset)
619 off = s->offset + sizeof(void *); 604 off = s->offset + sizeof(void *);
@@ -655,12 +640,12 @@ static void init_object(struct kmem_cache *s, void *object, u8 val)
655 u8 *p = object; 640 u8 *p = object;
656 641
657 if (s->flags & __OBJECT_POISON) { 642 if (s->flags & __OBJECT_POISON) {
658 memset(p, POISON_FREE, s->objsize - 1); 643 memset(p, POISON_FREE, s->object_size - 1);
659 p[s->objsize - 1] = POISON_END; 644 p[s->object_size - 1] = POISON_END;
660 } 645 }
661 646
662 if (s->flags & SLAB_RED_ZONE) 647 if (s->flags & SLAB_RED_ZONE)
663 memset(p + s->objsize, val, s->inuse - s->objsize); 648 memset(p + s->object_size, val, s->inuse - s->object_size);
664} 649}
665 650
666static void restore_bytes(struct kmem_cache *s, char *message, u8 data, 651static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
@@ -705,10 +690,10 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
705 * Poisoning uses 0x6b (POISON_FREE) and the last byte is 690 * Poisoning uses 0x6b (POISON_FREE) and the last byte is
706 * 0xa5 (POISON_END) 691 * 0xa5 (POISON_END)
707 * 692 *
708 * object + s->objsize 693 * object + s->object_size
709 * Padding to reach word boundary. This is also used for Redzoning. 694 * Padding to reach word boundary. This is also used for Redzoning.
710 * Padding is extended by another word if Redzoning is enabled and 695 * Padding is extended by another word if Redzoning is enabled and
711 * objsize == inuse. 696 * object_size == inuse.
712 * 697 *
713 * We fill with 0xbb (RED_INACTIVE) for inactive objects and with 698 * We fill with 0xbb (RED_INACTIVE) for inactive objects and with
714 * 0xcc (RED_ACTIVE) for objects in use. 699 * 0xcc (RED_ACTIVE) for objects in use.
@@ -727,7 +712,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
727 * object + s->size 712 * object + s->size
728 * Nothing is used beyond s->size. 713 * Nothing is used beyond s->size.
729 * 714 *
730 * If slabcaches are merged then the objsize and inuse boundaries are mostly 715 * If slabcaches are merged then the object_size and inuse boundaries are mostly
731 * ignored. And therefore no slab options that rely on these boundaries 716 * ignored. And therefore no slab options that rely on these boundaries
732 * may be used with merged slabcaches. 717 * may be used with merged slabcaches.
733 */ 718 */
@@ -787,25 +772,25 @@ static int check_object(struct kmem_cache *s, struct page *page,
787 void *object, u8 val) 772 void *object, u8 val)
788{ 773{
789 u8 *p = object; 774 u8 *p = object;
790 u8 *endobject = object + s->objsize; 775 u8 *endobject = object + s->object_size;
791 776
792 if (s->flags & SLAB_RED_ZONE) { 777 if (s->flags & SLAB_RED_ZONE) {
793 if (!check_bytes_and_report(s, page, object, "Redzone", 778 if (!check_bytes_and_report(s, page, object, "Redzone",
794 endobject, val, s->inuse - s->objsize)) 779 endobject, val, s->inuse - s->object_size))
795 return 0; 780 return 0;
796 } else { 781 } else {
797 if ((s->flags & SLAB_POISON) && s->objsize < s->inuse) { 782 if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
798 check_bytes_and_report(s, page, p, "Alignment padding", 783 check_bytes_and_report(s, page, p, "Alignment padding",
799 endobject, POISON_INUSE, s->inuse - s->objsize); 784 endobject, POISON_INUSE, s->inuse - s->object_size);
800 } 785 }
801 } 786 }
802 787
803 if (s->flags & SLAB_POISON) { 788 if (s->flags & SLAB_POISON) {
804 if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) && 789 if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
805 (!check_bytes_and_report(s, page, p, "Poison", p, 790 (!check_bytes_and_report(s, page, p, "Poison", p,
806 POISON_FREE, s->objsize - 1) || 791 POISON_FREE, s->object_size - 1) ||
807 !check_bytes_and_report(s, page, p, "Poison", 792 !check_bytes_and_report(s, page, p, "Poison",
808 p + s->objsize - 1, POISON_END, 1))) 793 p + s->object_size - 1, POISON_END, 1)))
809 return 0; 794 return 0;
810 /* 795 /*
811 * check_pad_bytes cleans up on its own. 796 * check_pad_bytes cleans up on its own.
@@ -926,7 +911,7 @@ static void trace(struct kmem_cache *s, struct page *page, void *object,
926 page->freelist); 911 page->freelist);
927 912
928 if (!alloc) 913 if (!alloc)
929 print_section("Object ", (void *)object, s->objsize); 914 print_section("Object ", (void *)object, s->object_size);
930 915
931 dump_stack(); 916 dump_stack();
932 } 917 }
@@ -942,14 +927,14 @@ static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
942 lockdep_trace_alloc(flags); 927 lockdep_trace_alloc(flags);
943 might_sleep_if(flags & __GFP_WAIT); 928 might_sleep_if(flags & __GFP_WAIT);
944 929
945 return should_failslab(s->objsize, flags, s->flags); 930 return should_failslab(s->object_size, flags, s->flags);
946} 931}
947 932
948static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object) 933static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object)
949{ 934{
950 flags &= gfp_allowed_mask; 935 flags &= gfp_allowed_mask;
951 kmemcheck_slab_alloc(s, flags, object, slab_ksize(s)); 936 kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
952 kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, flags); 937 kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags);
953} 938}
954 939
955static inline void slab_free_hook(struct kmem_cache *s, void *x) 940static inline void slab_free_hook(struct kmem_cache *s, void *x)
@@ -966,13 +951,13 @@ static inline void slab_free_hook(struct kmem_cache *s, void *x)
966 unsigned long flags; 951 unsigned long flags;
967 952
968 local_irq_save(flags); 953 local_irq_save(flags);
969 kmemcheck_slab_free(s, x, s->objsize); 954 kmemcheck_slab_free(s, x, s->object_size);
970 debug_check_no_locks_freed(x, s->objsize); 955 debug_check_no_locks_freed(x, s->object_size);
971 local_irq_restore(flags); 956 local_irq_restore(flags);
972 } 957 }
973#endif 958#endif
974 if (!(s->flags & SLAB_DEBUG_OBJECTS)) 959 if (!(s->flags & SLAB_DEBUG_OBJECTS))
975 debug_check_no_obj_freed(x, s->objsize); 960 debug_check_no_obj_freed(x, s->object_size);
976} 961}
977 962
978/* 963/*
@@ -1207,7 +1192,7 @@ out:
1207 1192
1208__setup("slub_debug", setup_slub_debug); 1193__setup("slub_debug", setup_slub_debug);
1209 1194
1210static unsigned long kmem_cache_flags(unsigned long objsize, 1195static unsigned long kmem_cache_flags(unsigned long object_size,
1211 unsigned long flags, const char *name, 1196 unsigned long flags, const char *name,
1212 void (*ctor)(void *)) 1197 void (*ctor)(void *))
1213{ 1198{
@@ -1237,7 +1222,7 @@ static inline int check_object(struct kmem_cache *s, struct page *page,
1237static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n, 1222static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
1238 struct page *page) {} 1223 struct page *page) {}
1239static inline void remove_full(struct kmem_cache *s, struct page *page) {} 1224static inline void remove_full(struct kmem_cache *s, struct page *page) {}
1240static inline unsigned long kmem_cache_flags(unsigned long objsize, 1225static inline unsigned long kmem_cache_flags(unsigned long object_size,
1241 unsigned long flags, const char *name, 1226 unsigned long flags, const char *name,
1242 void (*ctor)(void *)) 1227 void (*ctor)(void *))
1243{ 1228{
@@ -1314,13 +1299,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1314 stat(s, ORDER_FALLBACK); 1299 stat(s, ORDER_FALLBACK);
1315 } 1300 }
1316 1301
1317 if (flags & __GFP_WAIT) 1302 if (kmemcheck_enabled && page
1318 local_irq_disable();
1319
1320 if (!page)
1321 return NULL;
1322
1323 if (kmemcheck_enabled
1324 && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) { 1303 && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) {
1325 int pages = 1 << oo_order(oo); 1304 int pages = 1 << oo_order(oo);
1326 1305
@@ -1336,6 +1315,11 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1336 kmemcheck_mark_unallocated_pages(page, pages); 1315 kmemcheck_mark_unallocated_pages(page, pages);
1337 } 1316 }
1338 1317
1318 if (flags & __GFP_WAIT)
1319 local_irq_disable();
1320 if (!page)
1321 return NULL;
1322
1339 page->objects = oo_objects(oo); 1323 page->objects = oo_objects(oo);
1340 mod_zone_page_state(page_zone(page), 1324 mod_zone_page_state(page_zone(page),
1341 (s->flags & SLAB_RECLAIM_ACCOUNT) ? 1325 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
@@ -1490,12 +1474,12 @@ static inline void remove_partial(struct kmem_cache_node *n,
1490} 1474}
1491 1475
1492/* 1476/*
1493 * Lock slab, remove from the partial list and put the object into the 1477 * Remove slab from the partial list, freeze it and
1494 * per cpu freelist. 1478 * return the pointer to the freelist.
1495 * 1479 *
1496 * Returns a list of objects or NULL if it fails. 1480 * Returns a list of objects or NULL if it fails.
1497 * 1481 *
1498 * Must hold list_lock. 1482 * Must hold list_lock since we modify the partial list.
1499 */ 1483 */
1500static inline void *acquire_slab(struct kmem_cache *s, 1484static inline void *acquire_slab(struct kmem_cache *s,
1501 struct kmem_cache_node *n, struct page *page, 1485 struct kmem_cache_node *n, struct page *page,
@@ -1510,26 +1494,27 @@ static inline void *acquire_slab(struct kmem_cache *s,
1510 * The old freelist is the list of objects for the 1494 * The old freelist is the list of objects for the
1511 * per cpu allocation list. 1495 * per cpu allocation list.
1512 */ 1496 */
1513 do { 1497 freelist = page->freelist;
1514 freelist = page->freelist; 1498 counters = page->counters;
1515 counters = page->counters; 1499 new.counters = counters;
1516 new.counters = counters; 1500 if (mode) {
1517 if (mode) { 1501 new.inuse = page->objects;
1518 new.inuse = page->objects; 1502 new.freelist = NULL;
1519 new.freelist = NULL; 1503 } else {
1520 } else { 1504 new.freelist = freelist;
1521 new.freelist = freelist; 1505 }
1522 }
1523 1506
1524 VM_BUG_ON(new.frozen); 1507 VM_BUG_ON(new.frozen);
1525 new.frozen = 1; 1508 new.frozen = 1;
1526 1509
1527 } while (!__cmpxchg_double_slab(s, page, 1510 if (!__cmpxchg_double_slab(s, page,
1528 freelist, counters, 1511 freelist, counters,
1529 new.freelist, new.counters, 1512 new.freelist, new.counters,
1530 "lock and freeze")); 1513 "acquire_slab"))
1514 return NULL;
1531 1515
1532 remove_partial(n, page); 1516 remove_partial(n, page);
1517 WARN_ON(!freelist);
1533 return freelist; 1518 return freelist;
1534} 1519}
1535 1520
@@ -1563,7 +1548,6 @@ static void *get_partial_node(struct kmem_cache *s,
1563 1548
1564 if (!object) { 1549 if (!object) {
1565 c->page = page; 1550 c->page = page;
1566 c->node = page_to_nid(page);
1567 stat(s, ALLOC_FROM_PARTIAL); 1551 stat(s, ALLOC_FROM_PARTIAL);
1568 object = t; 1552 object = t;
1569 available = page->objects - page->inuse; 1553 available = page->objects - page->inuse;
@@ -1617,7 +1601,7 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
1617 1601
1618 do { 1602 do {
1619 cpuset_mems_cookie = get_mems_allowed(); 1603 cpuset_mems_cookie = get_mems_allowed();
1620 zonelist = node_zonelist(slab_node(current->mempolicy), flags); 1604 zonelist = node_zonelist(slab_node(), flags);
1621 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { 1605 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
1622 struct kmem_cache_node *n; 1606 struct kmem_cache_node *n;
1623 1607
@@ -1731,14 +1715,12 @@ void init_kmem_cache_cpus(struct kmem_cache *s)
1731/* 1715/*
1732 * Remove the cpu slab 1716 * Remove the cpu slab
1733 */ 1717 */
1734static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) 1718static void deactivate_slab(struct kmem_cache *s, struct page *page, void *freelist)
1735{ 1719{
1736 enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE }; 1720 enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
1737 struct page *page = c->page;
1738 struct kmem_cache_node *n = get_node(s, page_to_nid(page)); 1721 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1739 int lock = 0; 1722 int lock = 0;
1740 enum slab_modes l = M_NONE, m = M_NONE; 1723 enum slab_modes l = M_NONE, m = M_NONE;
1741 void *freelist;
1742 void *nextfree; 1724 void *nextfree;
1743 int tail = DEACTIVATE_TO_HEAD; 1725 int tail = DEACTIVATE_TO_HEAD;
1744 struct page new; 1726 struct page new;
@@ -1749,11 +1731,6 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1749 tail = DEACTIVATE_TO_TAIL; 1731 tail = DEACTIVATE_TO_TAIL;
1750 } 1732 }
1751 1733
1752 c->tid = next_tid(c->tid);
1753 c->page = NULL;
1754 freelist = c->freelist;
1755 c->freelist = NULL;
1756
1757 /* 1734 /*
1758 * Stage one: Free all available per cpu objects back 1735 * Stage one: Free all available per cpu objects back
1759 * to the page freelist while it is still frozen. Leave the 1736 * to the page freelist while it is still frozen. Leave the
@@ -1879,21 +1856,31 @@ redo:
1879 } 1856 }
1880} 1857}
1881 1858
1882/* Unfreeze all the cpu partial slabs */ 1859/*
1860 * Unfreeze all the cpu partial slabs.
1861 *
1862 * This function must be called with interrupt disabled.
1863 */
1883static void unfreeze_partials(struct kmem_cache *s) 1864static void unfreeze_partials(struct kmem_cache *s)
1884{ 1865{
1885 struct kmem_cache_node *n = NULL; 1866 struct kmem_cache_node *n = NULL, *n2 = NULL;
1886 struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab); 1867 struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);
1887 struct page *page, *discard_page = NULL; 1868 struct page *page, *discard_page = NULL;
1888 1869
1889 while ((page = c->partial)) { 1870 while ((page = c->partial)) {
1890 enum slab_modes { M_PARTIAL, M_FREE };
1891 enum slab_modes l, m;
1892 struct page new; 1871 struct page new;
1893 struct page old; 1872 struct page old;
1894 1873
1895 c->partial = page->next; 1874 c->partial = page->next;
1896 l = M_FREE; 1875
1876 n2 = get_node(s, page_to_nid(page));
1877 if (n != n2) {
1878 if (n)
1879 spin_unlock(&n->list_lock);
1880
1881 n = n2;
1882 spin_lock(&n->list_lock);
1883 }
1897 1884
1898 do { 1885 do {
1899 1886
@@ -1906,43 +1893,17 @@ static void unfreeze_partials(struct kmem_cache *s)
1906 1893
1907 new.frozen = 0; 1894 new.frozen = 0;
1908 1895
1909 if (!new.inuse && (!n || n->nr_partial > s->min_partial)) 1896 } while (!__cmpxchg_double_slab(s, page,
1910 m = M_FREE;
1911 else {
1912 struct kmem_cache_node *n2 = get_node(s,
1913 page_to_nid(page));
1914
1915 m = M_PARTIAL;
1916 if (n != n2) {
1917 if (n)
1918 spin_unlock(&n->list_lock);
1919
1920 n = n2;
1921 spin_lock(&n->list_lock);
1922 }
1923 }
1924
1925 if (l != m) {
1926 if (l == M_PARTIAL) {
1927 remove_partial(n, page);
1928 stat(s, FREE_REMOVE_PARTIAL);
1929 } else {
1930 add_partial(n, page,
1931 DEACTIVATE_TO_TAIL);
1932 stat(s, FREE_ADD_PARTIAL);
1933 }
1934
1935 l = m;
1936 }
1937
1938 } while (!cmpxchg_double_slab(s, page,
1939 old.freelist, old.counters, 1897 old.freelist, old.counters,
1940 new.freelist, new.counters, 1898 new.freelist, new.counters,
1941 "unfreezing slab")); 1899 "unfreezing slab"));
1942 1900
1943 if (m == M_FREE) { 1901 if (unlikely(!new.inuse && n->nr_partial > s->min_partial)) {
1944 page->next = discard_page; 1902 page->next = discard_page;
1945 discard_page = page; 1903 discard_page = page;
1904 } else {
1905 add_partial(n, page, DEACTIVATE_TO_TAIL);
1906 stat(s, FREE_ADD_PARTIAL);
1946 } 1907 }
1947 } 1908 }
1948 1909
@@ -2011,7 +1972,11 @@ int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
2011static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) 1972static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
2012{ 1973{
2013 stat(s, CPUSLAB_FLUSH); 1974 stat(s, CPUSLAB_FLUSH);
2014 deactivate_slab(s, c); 1975 deactivate_slab(s, c->page, c->freelist);
1976
1977 c->tid = next_tid(c->tid);
1978 c->page = NULL;
1979 c->freelist = NULL;
2015} 1980}
2016 1981
2017/* 1982/*
@@ -2055,10 +2020,10 @@ static void flush_all(struct kmem_cache *s)
2055 * Check if the objects in a per cpu structure fit numa 2020 * Check if the objects in a per cpu structure fit numa
2056 * locality expectations. 2021 * locality expectations.
2057 */ 2022 */
2058static inline int node_match(struct kmem_cache_cpu *c, int node) 2023static inline int node_match(struct page *page, int node)
2059{ 2024{
2060#ifdef CONFIG_NUMA 2025#ifdef CONFIG_NUMA
2061 if (node != NUMA_NO_NODE && c->node != node) 2026 if (node != NUMA_NO_NODE && page_to_nid(page) != node)
2062 return 0; 2027 return 0;
2063#endif 2028#endif
2064 return 1; 2029 return 1;
@@ -2101,10 +2066,10 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
2101 "SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n", 2066 "SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n",
2102 nid, gfpflags); 2067 nid, gfpflags);
2103 printk(KERN_WARNING " cache: %s, object size: %d, buffer size: %d, " 2068 printk(KERN_WARNING " cache: %s, object size: %d, buffer size: %d, "
2104 "default order: %d, min order: %d\n", s->name, s->objsize, 2069 "default order: %d, min order: %d\n", s->name, s->object_size,
2105 s->size, oo_order(s->oo), oo_order(s->min)); 2070 s->size, oo_order(s->oo), oo_order(s->min));
2106 2071
2107 if (oo_order(s->min) > get_order(s->objsize)) 2072 if (oo_order(s->min) > get_order(s->object_size))
2108 printk(KERN_WARNING " %s debugging increased min order, use " 2073 printk(KERN_WARNING " %s debugging increased min order, use "
2109 "slub_debug=O to disable.\n", s->name); 2074 "slub_debug=O to disable.\n", s->name);
2110 2075
@@ -2130,10 +2095,16 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
2130static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags, 2095static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
2131 int node, struct kmem_cache_cpu **pc) 2096 int node, struct kmem_cache_cpu **pc)
2132{ 2097{
2133 void *object; 2098 void *freelist;
2134 struct kmem_cache_cpu *c; 2099 struct kmem_cache_cpu *c = *pc;
2135 struct page *page = new_slab(s, flags, node); 2100 struct page *page;
2101
2102 freelist = get_partial(s, flags, node, c);
2136 2103
2104 if (freelist)
2105 return freelist;
2106
2107 page = new_slab(s, flags, node);
2137 if (page) { 2108 if (page) {
2138 c = __this_cpu_ptr(s->cpu_slab); 2109 c = __this_cpu_ptr(s->cpu_slab);
2139 if (c->page) 2110 if (c->page)
@@ -2143,17 +2114,16 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
2143 * No other reference to the page yet so we can 2114 * No other reference to the page yet so we can
2144 * muck around with it freely without cmpxchg 2115 * muck around with it freely without cmpxchg
2145 */ 2116 */
2146 object = page->freelist; 2117 freelist = page->freelist;
2147 page->freelist = NULL; 2118 page->freelist = NULL;
2148 2119
2149 stat(s, ALLOC_SLAB); 2120 stat(s, ALLOC_SLAB);
2150 c->node = page_to_nid(page);
2151 c->page = page; 2121 c->page = page;
2152 *pc = c; 2122 *pc = c;
2153 } else 2123 } else
2154 object = NULL; 2124 freelist = NULL;
2155 2125
2156 return object; 2126 return freelist;
2157} 2127}
2158 2128
2159/* 2129/*
@@ -2163,6 +2133,8 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
2163 * The page is still frozen if the return value is not NULL. 2133 * The page is still frozen if the return value is not NULL.
2164 * 2134 *
2165 * If this function returns NULL then the page has been unfrozen. 2135 * If this function returns NULL then the page has been unfrozen.
2136 *
2137 * This function must be called with interrupt disabled.
2166 */ 2138 */
2167static inline void *get_freelist(struct kmem_cache *s, struct page *page) 2139static inline void *get_freelist(struct kmem_cache *s, struct page *page)
2168{ 2140{
@@ -2173,13 +2145,14 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page)
2173 do { 2145 do {
2174 freelist = page->freelist; 2146 freelist = page->freelist;
2175 counters = page->counters; 2147 counters = page->counters;
2148
2176 new.counters = counters; 2149 new.counters = counters;
2177 VM_BUG_ON(!new.frozen); 2150 VM_BUG_ON(!new.frozen);
2178 2151
2179 new.inuse = page->objects; 2152 new.inuse = page->objects;
2180 new.frozen = freelist != NULL; 2153 new.frozen = freelist != NULL;
2181 2154
2182 } while (!cmpxchg_double_slab(s, page, 2155 } while (!__cmpxchg_double_slab(s, page,
2183 freelist, counters, 2156 freelist, counters,
2184 NULL, new.counters, 2157 NULL, new.counters,
2185 "get_freelist")); 2158 "get_freelist"));
@@ -2206,7 +2179,8 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page)
2206static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, 2179static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2207 unsigned long addr, struct kmem_cache_cpu *c) 2180 unsigned long addr, struct kmem_cache_cpu *c)
2208{ 2181{
2209 void **object; 2182 void *freelist;
2183 struct page *page;
2210 unsigned long flags; 2184 unsigned long flags;
2211 2185
2212 local_irq_save(flags); 2186 local_irq_save(flags);
@@ -2219,25 +2193,29 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2219 c = this_cpu_ptr(s->cpu_slab); 2193 c = this_cpu_ptr(s->cpu_slab);
2220#endif 2194#endif
2221 2195
2222 if (!c->page) 2196 page = c->page;
2197 if (!page)
2223 goto new_slab; 2198 goto new_slab;
2224redo: 2199redo:
2225 if (unlikely(!node_match(c, node))) { 2200
2201 if (unlikely(!node_match(page, node))) {
2226 stat(s, ALLOC_NODE_MISMATCH); 2202 stat(s, ALLOC_NODE_MISMATCH);
2227 deactivate_slab(s, c); 2203 deactivate_slab(s, page, c->freelist);
2204 c->page = NULL;
2205 c->freelist = NULL;
2228 goto new_slab; 2206 goto new_slab;
2229 } 2207 }
2230 2208
2231 /* must check again c->freelist in case of cpu migration or IRQ */ 2209 /* must check again c->freelist in case of cpu migration or IRQ */
2232 object = c->freelist; 2210 freelist = c->freelist;
2233 if (object) 2211 if (freelist)
2234 goto load_freelist; 2212 goto load_freelist;
2235 2213
2236 stat(s, ALLOC_SLOWPATH); 2214 stat(s, ALLOC_SLOWPATH);
2237 2215
2238 object = get_freelist(s, c->page); 2216 freelist = get_freelist(s, page);
2239 2217
2240 if (!object) { 2218 if (!freelist) {
2241 c->page = NULL; 2219 c->page = NULL;
2242 stat(s, DEACTIVATE_BYPASS); 2220 stat(s, DEACTIVATE_BYPASS);
2243 goto new_slab; 2221 goto new_slab;
@@ -2246,50 +2224,50 @@ redo:
2246 stat(s, ALLOC_REFILL); 2224 stat(s, ALLOC_REFILL);
2247 2225
2248load_freelist: 2226load_freelist:
2249 c->freelist = get_freepointer(s, object); 2227 /*
2228 * freelist is pointing to the list of objects to be used.
2229 * page is pointing to the page from which the objects are obtained.
2230 * That page must be frozen for per cpu allocations to work.
2231 */
2232 VM_BUG_ON(!c->page->frozen);
2233 c->freelist = get_freepointer(s, freelist);
2250 c->tid = next_tid(c->tid); 2234 c->tid = next_tid(c->tid);
2251 local_irq_restore(flags); 2235 local_irq_restore(flags);
2252 return object; 2236 return freelist;
2253 2237
2254new_slab: 2238new_slab:
2255 2239
2256 if (c->partial) { 2240 if (c->partial) {
2257 c->page = c->partial; 2241 page = c->page = c->partial;
2258 c->partial = c->page->next; 2242 c->partial = page->next;
2259 c->node = page_to_nid(c->page);
2260 stat(s, CPU_PARTIAL_ALLOC); 2243 stat(s, CPU_PARTIAL_ALLOC);
2261 c->freelist = NULL; 2244 c->freelist = NULL;
2262 goto redo; 2245 goto redo;
2263 } 2246 }
2264 2247
2265 /* Then do expensive stuff like retrieving pages from the partial lists */ 2248 freelist = new_slab_objects(s, gfpflags, node, &c);
2266 object = get_partial(s, gfpflags, node, c);
2267 2249
2268 if (unlikely(!object)) { 2250 if (unlikely(!freelist)) {
2251 if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
2252 slab_out_of_memory(s, gfpflags, node);
2269 2253
2270 object = new_slab_objects(s, gfpflags, node, &c); 2254 local_irq_restore(flags);
2271 2255 return NULL;
2272 if (unlikely(!object)) {
2273 if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
2274 slab_out_of_memory(s, gfpflags, node);
2275
2276 local_irq_restore(flags);
2277 return NULL;
2278 }
2279 } 2256 }
2280 2257
2258 page = c->page;
2281 if (likely(!kmem_cache_debug(s))) 2259 if (likely(!kmem_cache_debug(s)))
2282 goto load_freelist; 2260 goto load_freelist;
2283 2261
2284 /* Only entered in the debug case */ 2262 /* Only entered in the debug case */
2285 if (!alloc_debug_processing(s, c->page, object, addr)) 2263 if (!alloc_debug_processing(s, page, freelist, addr))
2286 goto new_slab; /* Slab failed checks. Next slab needed */ 2264 goto new_slab; /* Slab failed checks. Next slab needed */
2287 2265
2288 c->freelist = get_freepointer(s, object); 2266 deactivate_slab(s, page, get_freepointer(s, freelist));
2289 deactivate_slab(s, c); 2267 c->page = NULL;
2290 c->node = NUMA_NO_NODE; 2268 c->freelist = NULL;
2291 local_irq_restore(flags); 2269 local_irq_restore(flags);
2292 return object; 2270 return freelist;
2293} 2271}
2294 2272
2295/* 2273/*
@@ -2307,6 +2285,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
2307{ 2285{
2308 void **object; 2286 void **object;
2309 struct kmem_cache_cpu *c; 2287 struct kmem_cache_cpu *c;
2288 struct page *page;
2310 unsigned long tid; 2289 unsigned long tid;
2311 2290
2312 if (slab_pre_alloc_hook(s, gfpflags)) 2291 if (slab_pre_alloc_hook(s, gfpflags))
@@ -2332,7 +2311,8 @@ redo:
2332 barrier(); 2311 barrier();
2333 2312
2334 object = c->freelist; 2313 object = c->freelist;
2335 if (unlikely(!object || !node_match(c, node))) 2314 page = c->page;
2315 if (unlikely(!object || !node_match(page, node)))
2336 2316
2337 object = __slab_alloc(s, gfpflags, node, addr, c); 2317 object = __slab_alloc(s, gfpflags, node, addr, c);
2338 2318
@@ -2364,7 +2344,7 @@ redo:
2364 } 2344 }
2365 2345
2366 if (unlikely(gfpflags & __GFP_ZERO) && object) 2346 if (unlikely(gfpflags & __GFP_ZERO) && object)
2367 memset(object, 0, s->objsize); 2347 memset(object, 0, s->object_size);
2368 2348
2369 slab_post_alloc_hook(s, gfpflags, object); 2349 slab_post_alloc_hook(s, gfpflags, object);
2370 2350
@@ -2375,7 +2355,7 @@ void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
2375{ 2355{
2376 void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_); 2356 void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_);
2377 2357
2378 trace_kmem_cache_alloc(_RET_IP_, ret, s->objsize, s->size, gfpflags); 2358 trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size, s->size, gfpflags);
2379 2359
2380 return ret; 2360 return ret;
2381} 2361}
@@ -2405,7 +2385,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
2405 void *ret = slab_alloc(s, gfpflags, node, _RET_IP_); 2385 void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);
2406 2386
2407 trace_kmem_cache_alloc_node(_RET_IP_, ret, 2387 trace_kmem_cache_alloc_node(_RET_IP_, ret,
2408 s->objsize, s->size, gfpflags, node); 2388 s->object_size, s->size, gfpflags, node);
2409 2389
2410 return ret; 2390 return ret;
2411} 2391}
@@ -2900,7 +2880,7 @@ static void set_min_partial(struct kmem_cache *s, unsigned long min)
2900static int calculate_sizes(struct kmem_cache *s, int forced_order) 2880static int calculate_sizes(struct kmem_cache *s, int forced_order)
2901{ 2881{
2902 unsigned long flags = s->flags; 2882 unsigned long flags = s->flags;
2903 unsigned long size = s->objsize; 2883 unsigned long size = s->object_size;
2904 unsigned long align = s->align; 2884 unsigned long align = s->align;
2905 int order; 2885 int order;
2906 2886
@@ -2929,7 +2909,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
2929 * end of the object and the free pointer. If not then add an 2909 * end of the object and the free pointer. If not then add an
2930 * additional word to have some bytes to store Redzone information. 2910 * additional word to have some bytes to store Redzone information.
2931 */ 2911 */
2932 if ((flags & SLAB_RED_ZONE) && size == s->objsize) 2912 if ((flags & SLAB_RED_ZONE) && size == s->object_size)
2933 size += sizeof(void *); 2913 size += sizeof(void *);
2934#endif 2914#endif
2935 2915
@@ -2977,7 +2957,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
2977 * user specified and the dynamic determination of cache line size 2957 * user specified and the dynamic determination of cache line size
2978 * on bootup. 2958 * on bootup.
2979 */ 2959 */
2980 align = calculate_alignment(flags, align, s->objsize); 2960 align = calculate_alignment(flags, align, s->object_size);
2981 s->align = align; 2961 s->align = align;
2982 2962
2983 /* 2963 /*
@@ -3025,7 +3005,7 @@ static int kmem_cache_open(struct kmem_cache *s,
3025 memset(s, 0, kmem_size); 3005 memset(s, 0, kmem_size);
3026 s->name = name; 3006 s->name = name;
3027 s->ctor = ctor; 3007 s->ctor = ctor;
3028 s->objsize = size; 3008 s->object_size = size;
3029 s->align = align; 3009 s->align = align;
3030 s->flags = kmem_cache_flags(size, flags, name, ctor); 3010 s->flags = kmem_cache_flags(size, flags, name, ctor);
3031 s->reserved = 0; 3011 s->reserved = 0;
@@ -3040,7 +3020,7 @@ static int kmem_cache_open(struct kmem_cache *s,
3040 * Disable debugging flags that store metadata if the min slab 3020 * Disable debugging flags that store metadata if the min slab
3041 * order increased. 3021 * order increased.
3042 */ 3022 */
3043 if (get_order(s->size) > get_order(s->objsize)) { 3023 if (get_order(s->size) > get_order(s->object_size)) {
3044 s->flags &= ~DEBUG_METADATA_FLAGS; 3024 s->flags &= ~DEBUG_METADATA_FLAGS;
3045 s->offset = 0; 3025 s->offset = 0;
3046 if (!calculate_sizes(s, -1)) 3026 if (!calculate_sizes(s, -1))
@@ -3114,7 +3094,7 @@ error:
3114 */ 3094 */
3115unsigned int kmem_cache_size(struct kmem_cache *s) 3095unsigned int kmem_cache_size(struct kmem_cache *s)
3116{ 3096{
3117 return s->objsize; 3097 return s->object_size;
3118} 3098}
3119EXPORT_SYMBOL(kmem_cache_size); 3099EXPORT_SYMBOL(kmem_cache_size);
3120 3100
@@ -3192,11 +3172,11 @@ static inline int kmem_cache_close(struct kmem_cache *s)
3192 */ 3172 */
3193void kmem_cache_destroy(struct kmem_cache *s) 3173void kmem_cache_destroy(struct kmem_cache *s)
3194{ 3174{
3195 down_write(&slub_lock); 3175 mutex_lock(&slab_mutex);
3196 s->refcount--; 3176 s->refcount--;
3197 if (!s->refcount) { 3177 if (!s->refcount) {
3198 list_del(&s->list); 3178 list_del(&s->list);
3199 up_write(&slub_lock); 3179 mutex_unlock(&slab_mutex);
3200 if (kmem_cache_close(s)) { 3180 if (kmem_cache_close(s)) {
3201 printk(KERN_ERR "SLUB %s: %s called for cache that " 3181 printk(KERN_ERR "SLUB %s: %s called for cache that "
3202 "still has objects.\n", s->name, __func__); 3182 "still has objects.\n", s->name, __func__);
@@ -3206,7 +3186,7 @@ void kmem_cache_destroy(struct kmem_cache *s)
3206 rcu_barrier(); 3186 rcu_barrier();
3207 sysfs_slab_remove(s); 3187 sysfs_slab_remove(s);
3208 } else 3188 } else
3209 up_write(&slub_lock); 3189 mutex_unlock(&slab_mutex);
3210} 3190}
3211EXPORT_SYMBOL(kmem_cache_destroy); 3191EXPORT_SYMBOL(kmem_cache_destroy);
3212 3192
@@ -3268,7 +3248,7 @@ static struct kmem_cache *__init create_kmalloc_cache(const char *name,
3268 3248
3269 /* 3249 /*
3270 * This function is called with IRQs disabled during early-boot on 3250 * This function is called with IRQs disabled during early-boot on
3271 * single CPU so there's no need to take slub_lock here. 3251 * single CPU so there's no need to take slab_mutex here.
3272 */ 3252 */
3273 if (!kmem_cache_open(s, name, size, ARCH_KMALLOC_MINALIGN, 3253 if (!kmem_cache_open(s, name, size, ARCH_KMALLOC_MINALIGN,
3274 flags, NULL)) 3254 flags, NULL))
@@ -3553,10 +3533,10 @@ static int slab_mem_going_offline_callback(void *arg)
3553{ 3533{
3554 struct kmem_cache *s; 3534 struct kmem_cache *s;
3555 3535
3556 down_read(&slub_lock); 3536 mutex_lock(&slab_mutex);
3557 list_for_each_entry(s, &slab_caches, list) 3537 list_for_each_entry(s, &slab_caches, list)
3558 kmem_cache_shrink(s); 3538 kmem_cache_shrink(s);
3559 up_read(&slub_lock); 3539 mutex_unlock(&slab_mutex);
3560 3540
3561 return 0; 3541 return 0;
3562} 3542}
@@ -3577,7 +3557,7 @@ static void slab_mem_offline_callback(void *arg)
3577 if (offline_node < 0) 3557 if (offline_node < 0)
3578 return; 3558 return;
3579 3559
3580 down_read(&slub_lock); 3560 mutex_lock(&slab_mutex);
3581 list_for_each_entry(s, &slab_caches, list) { 3561 list_for_each_entry(s, &slab_caches, list) {
3582 n = get_node(s, offline_node); 3562 n = get_node(s, offline_node);
3583 if (n) { 3563 if (n) {
@@ -3593,7 +3573,7 @@ static void slab_mem_offline_callback(void *arg)
3593 kmem_cache_free(kmem_cache_node, n); 3573 kmem_cache_free(kmem_cache_node, n);
3594 } 3574 }
3595 } 3575 }
3596 up_read(&slub_lock); 3576 mutex_unlock(&slab_mutex);
3597} 3577}
3598 3578
3599static int slab_mem_going_online_callback(void *arg) 3579static int slab_mem_going_online_callback(void *arg)
@@ -3616,7 +3596,7 @@ static int slab_mem_going_online_callback(void *arg)
3616 * allocate a kmem_cache_node structure in order to bring the node 3596 * allocate a kmem_cache_node structure in order to bring the node
3617 * online. 3597 * online.
3618 */ 3598 */
3619 down_read(&slub_lock); 3599 mutex_lock(&slab_mutex);
3620 list_for_each_entry(s, &slab_caches, list) { 3600 list_for_each_entry(s, &slab_caches, list) {
3621 /* 3601 /*
3622 * XXX: kmem_cache_alloc_node will fallback to other nodes 3602 * XXX: kmem_cache_alloc_node will fallback to other nodes
@@ -3632,7 +3612,7 @@ static int slab_mem_going_online_callback(void *arg)
3632 s->node[nid] = n; 3612 s->node[nid] = n;
3633 } 3613 }
3634out: 3614out:
3635 up_read(&slub_lock); 3615 mutex_unlock(&slab_mutex);
3636 return ret; 3616 return ret;
3637} 3617}
3638 3618
@@ -3843,11 +3823,11 @@ void __init kmem_cache_init(void)
3843 3823
3844 if (s && s->size) { 3824 if (s && s->size) {
3845 char *name = kasprintf(GFP_NOWAIT, 3825 char *name = kasprintf(GFP_NOWAIT,
3846 "dma-kmalloc-%d", s->objsize); 3826 "dma-kmalloc-%d", s->object_size);
3847 3827
3848 BUG_ON(!name); 3828 BUG_ON(!name);
3849 kmalloc_dma_caches[i] = create_kmalloc_cache(name, 3829 kmalloc_dma_caches[i] = create_kmalloc_cache(name,
3850 s->objsize, SLAB_CACHE_DMA); 3830 s->object_size, SLAB_CACHE_DMA);
3851 } 3831 }
3852 } 3832 }
3853#endif 3833#endif
@@ -3924,16 +3904,12 @@ static struct kmem_cache *find_mergeable(size_t size,
3924 return NULL; 3904 return NULL;
3925} 3905}
3926 3906
3927struct kmem_cache *kmem_cache_create(const char *name, size_t size, 3907struct kmem_cache *__kmem_cache_create(const char *name, size_t size,
3928 size_t align, unsigned long flags, void (*ctor)(void *)) 3908 size_t align, unsigned long flags, void (*ctor)(void *))
3929{ 3909{
3930 struct kmem_cache *s; 3910 struct kmem_cache *s;
3931 char *n; 3911 char *n;
3932 3912
3933 if (WARN_ON(!name))
3934 return NULL;
3935
3936 down_write(&slub_lock);
3937 s = find_mergeable(size, align, flags, name, ctor); 3913 s = find_mergeable(size, align, flags, name, ctor);
3938 if (s) { 3914 if (s) {
3939 s->refcount++; 3915 s->refcount++;
@@ -3941,49 +3917,42 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
3941 * Adjust the object sizes so that we clear 3917 * Adjust the object sizes so that we clear
3942 * the complete object on kzalloc. 3918 * the complete object on kzalloc.
3943 */ 3919 */
3944 s->objsize = max(s->objsize, (int)size); 3920 s->object_size = max(s->object_size, (int)size);
3945 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); 3921 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
3946 3922
3947 if (sysfs_slab_alias(s, name)) { 3923 if (sysfs_slab_alias(s, name)) {
3948 s->refcount--; 3924 s->refcount--;
3949 goto err; 3925 return NULL;
3950 } 3926 }
3951 up_write(&slub_lock);
3952 return s; 3927 return s;
3953 } 3928 }
3954 3929
3955 n = kstrdup(name, GFP_KERNEL); 3930 n = kstrdup(name, GFP_KERNEL);
3956 if (!n) 3931 if (!n)
3957 goto err; 3932 return NULL;
3958 3933
3959 s = kmalloc(kmem_size, GFP_KERNEL); 3934 s = kmalloc(kmem_size, GFP_KERNEL);
3960 if (s) { 3935 if (s) {
3961 if (kmem_cache_open(s, n, 3936 if (kmem_cache_open(s, n,
3962 size, align, flags, ctor)) { 3937 size, align, flags, ctor)) {
3938 int r;
3939
3963 list_add(&s->list, &slab_caches); 3940 list_add(&s->list, &slab_caches);
3964 up_write(&slub_lock); 3941 mutex_unlock(&slab_mutex);
3965 if (sysfs_slab_add(s)) { 3942 r = sysfs_slab_add(s);
3966 down_write(&slub_lock); 3943 mutex_lock(&slab_mutex);
3967 list_del(&s->list); 3944
3968 kfree(n); 3945 if (!r)
3969 kfree(s); 3946 return s;
3970 goto err; 3947
3971 } 3948 list_del(&s->list);
3972 return s; 3949 kmem_cache_close(s);
3973 } 3950 }
3974 kfree(s); 3951 kfree(s);
3975 } 3952 }
3976 kfree(n); 3953 kfree(n);
3977err: 3954 return NULL;
3978 up_write(&slub_lock);
3979
3980 if (flags & SLAB_PANIC)
3981 panic("Cannot create slabcache %s\n", name);
3982 else
3983 s = NULL;
3984 return s;
3985} 3955}
3986EXPORT_SYMBOL(kmem_cache_create);
3987 3956
3988#ifdef CONFIG_SMP 3957#ifdef CONFIG_SMP
3989/* 3958/*
@@ -4002,13 +3971,13 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
4002 case CPU_UP_CANCELED_FROZEN: 3971 case CPU_UP_CANCELED_FROZEN:
4003 case CPU_DEAD: 3972 case CPU_DEAD:
4004 case CPU_DEAD_FROZEN: 3973 case CPU_DEAD_FROZEN:
4005 down_read(&slub_lock); 3974 mutex_lock(&slab_mutex);
4006 list_for_each_entry(s, &slab_caches, list) { 3975 list_for_each_entry(s, &slab_caches, list) {
4007 local_irq_save(flags); 3976 local_irq_save(flags);
4008 __flush_cpu_slab(s, cpu); 3977 __flush_cpu_slab(s, cpu);
4009 local_irq_restore(flags); 3978 local_irq_restore(flags);
4010 } 3979 }
4011 up_read(&slub_lock); 3980 mutex_unlock(&slab_mutex);
4012 break; 3981 break;
4013 default: 3982 default:
4014 break; 3983 break;
@@ -4500,30 +4469,31 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
4500 4469
4501 for_each_possible_cpu(cpu) { 4470 for_each_possible_cpu(cpu) {
4502 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); 4471 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
4503 int node = ACCESS_ONCE(c->node); 4472 int node;
4504 struct page *page; 4473 struct page *page;
4505 4474
4506 if (node < 0)
4507 continue;
4508 page = ACCESS_ONCE(c->page); 4475 page = ACCESS_ONCE(c->page);
4509 if (page) { 4476 if (!page)
4510 if (flags & SO_TOTAL) 4477 continue;
4511 x = page->objects;
4512 else if (flags & SO_OBJECTS)
4513 x = page->inuse;
4514 else
4515 x = 1;
4516 4478
4517 total += x; 4479 node = page_to_nid(page);
4518 nodes[node] += x; 4480 if (flags & SO_TOTAL)
4519 } 4481 x = page->objects;
4520 page = c->partial; 4482 else if (flags & SO_OBJECTS)
4483 x = page->inuse;
4484 else
4485 x = 1;
4521 4486
4487 total += x;
4488 nodes[node] += x;
4489
4490 page = ACCESS_ONCE(c->partial);
4522 if (page) { 4491 if (page) {
4523 x = page->pobjects; 4492 x = page->pobjects;
4524 total += x; 4493 total += x;
4525 nodes[node] += x; 4494 nodes[node] += x;
4526 } 4495 }
4496
4527 per_cpu[node]++; 4497 per_cpu[node]++;
4528 } 4498 }
4529 } 4499 }
@@ -4623,7 +4593,7 @@ SLAB_ATTR_RO(align);
4623 4593
4624static ssize_t object_size_show(struct kmem_cache *s, char *buf) 4594static ssize_t object_size_show(struct kmem_cache *s, char *buf)
4625{ 4595{
4626 return sprintf(buf, "%d\n", s->objsize); 4596 return sprintf(buf, "%d\n", s->object_size);
4627} 4597}
4628SLAB_ATTR_RO(object_size); 4598SLAB_ATTR_RO(object_size);
4629 4599
@@ -5286,7 +5256,7 @@ static int sysfs_slab_add(struct kmem_cache *s)
5286 const char *name; 5256 const char *name;
5287 int unmergeable; 5257 int unmergeable;
5288 5258
5289 if (slab_state < SYSFS) 5259 if (slab_state < FULL)
5290 /* Defer until later */ 5260 /* Defer until later */
5291 return 0; 5261 return 0;
5292 5262
@@ -5331,7 +5301,7 @@ static int sysfs_slab_add(struct kmem_cache *s)
5331 5301
5332static void sysfs_slab_remove(struct kmem_cache *s) 5302static void sysfs_slab_remove(struct kmem_cache *s)
5333{ 5303{
5334 if (slab_state < SYSFS) 5304 if (slab_state < FULL)
5335 /* 5305 /*
5336 * Sysfs has not been setup yet so no need to remove the 5306 * Sysfs has not been setup yet so no need to remove the
5337 * cache from sysfs. 5307 * cache from sysfs.
@@ -5359,7 +5329,7 @@ static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
5359{ 5329{
5360 struct saved_alias *al; 5330 struct saved_alias *al;
5361 5331
5362 if (slab_state == SYSFS) { 5332 if (slab_state == FULL) {
5363 /* 5333 /*
5364 * If we have a leftover link then remove it. 5334 * If we have a leftover link then remove it.
5365 */ 5335 */
@@ -5383,16 +5353,16 @@ static int __init slab_sysfs_init(void)
5383 struct kmem_cache *s; 5353 struct kmem_cache *s;
5384 int err; 5354 int err;
5385 5355
5386 down_write(&slub_lock); 5356 mutex_lock(&slab_mutex);
5387 5357
5388 slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj); 5358 slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);
5389 if (!slab_kset) { 5359 if (!slab_kset) {
5390 up_write(&slub_lock); 5360 mutex_unlock(&slab_mutex);
5391 printk(KERN_ERR "Cannot register slab subsystem.\n"); 5361 printk(KERN_ERR "Cannot register slab subsystem.\n");
5392 return -ENOSYS; 5362 return -ENOSYS;
5393 } 5363 }
5394 5364
5395 slab_state = SYSFS; 5365 slab_state = FULL;
5396 5366
5397 list_for_each_entry(s, &slab_caches, list) { 5367 list_for_each_entry(s, &slab_caches, list) {
5398 err = sysfs_slab_add(s); 5368 err = sysfs_slab_add(s);
@@ -5408,11 +5378,11 @@ static int __init slab_sysfs_init(void)
5408 err = sysfs_slab_alias(al->s, al->name); 5378 err = sysfs_slab_alias(al->s, al->name);
5409 if (err) 5379 if (err)
5410 printk(KERN_ERR "SLUB: Unable to add boot slab alias" 5380 printk(KERN_ERR "SLUB: Unable to add boot slab alias"
5411 " %s to sysfs\n", s->name); 5381 " %s to sysfs\n", al->name);
5412 kfree(al); 5382 kfree(al);
5413 } 5383 }
5414 5384
5415 up_write(&slub_lock); 5385 mutex_unlock(&slab_mutex);
5416 resiliency_test(); 5386 resiliency_test();
5417 return 0; 5387 return 0;
5418} 5388}
@@ -5427,7 +5397,7 @@ __initcall(slab_sysfs_init);
5427static void print_slabinfo_header(struct seq_file *m) 5397static void print_slabinfo_header(struct seq_file *m)
5428{ 5398{
5429 seq_puts(m, "slabinfo - version: 2.1\n"); 5399 seq_puts(m, "slabinfo - version: 2.1\n");
5430 seq_puts(m, "# name <active_objs> <num_objs> <objsize> " 5400 seq_puts(m, "# name <active_objs> <num_objs> <object_size> "
5431 "<objperslab> <pagesperslab>"); 5401 "<objperslab> <pagesperslab>");
5432 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>"); 5402 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
5433 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>"); 5403 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
@@ -5438,7 +5408,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
5438{ 5408{
5439 loff_t n = *pos; 5409 loff_t n = *pos;
5440 5410
5441 down_read(&slub_lock); 5411 mutex_lock(&slab_mutex);
5442 if (!n) 5412 if (!n)
5443 print_slabinfo_header(m); 5413 print_slabinfo_header(m);
5444 5414
@@ -5452,7 +5422,7 @@ static void *s_next(struct seq_file *m, void *p, loff_t *pos)
5452 5422
5453static void s_stop(struct seq_file *m, void *p) 5423static void s_stop(struct seq_file *m, void *p)
5454{ 5424{
5455 up_read(&slub_lock); 5425 mutex_unlock(&slab_mutex);
5456} 5426}
5457 5427
5458static int s_show(struct seq_file *m, void *p) 5428static int s_show(struct seq_file *m, void *p)