aboutsummaryrefslogtreecommitdiffstats
path: root/mm/slub.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-07-30 14:32:24 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-07-30 14:32:24 -0400
commit720d85075b7ed3617de8ca8d9097390e303e9f60 (patch)
tree3ce3911aa3f948b94949440954503c9f1b10ee64 /mm/slub.c
parent637e49ae4f5b4a82b418dae8435e16132b298b7e (diff)
parent73a1180e140d45cb9ef5fbab103d3bbfc4c84606 (diff)
Merge branch 'slab/next' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/linux
Pull SLAB changes from Pekka Enberg: "Most of the changes included are from Christoph Lameter's "common slab" patch series that unifies common parts of SLUB, SLAB, and SLOB allocators. The unification is needed for Glauber Costa's "kmem memcg" work that will hopefully appear for v3.7. The rest of the changes are fixes and speedups by various people." * 'slab/next' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/linux: (32 commits) mm: Fix build warning in kmem_cache_create() slob: Fix early boot kernel crash mm, slub: ensure irqs are enabled for kmemcheck mm, sl[aou]b: Move kmem_cache_create mutex handling to common code mm, sl[aou]b: Use a common mutex definition mm, sl[aou]b: Common definition for boot state of the slab allocators mm, sl[aou]b: Extract common code for kmem_cache_create() slub: remove invalid reference to list iterator variable mm: Fix signal SIGFPE in slabinfo.c. slab: move FULL state transition to an initcall slab: Fix a typo in commit 8c138b "slab: Get rid of obj_size macro" mm, slab: Build fix for recent kmem_cache changes slab: rename gfpflags to allocflags slub: refactoring unfreeze_partials() slub: use __cmpxchg_double_slab() at interrupt disabled place slab/mempolicy: always use local policy from interrupt context slab: Get rid of obj_size macro mm, sl[aou]b: Extract common fields from struct kmem_cache slab: Remove some accessors slab: Use page struct fields instead of casting ...
Diffstat (limited to 'mm/slub.c')
-rw-r--r--mm/slub.c436
1 files changed, 203 insertions, 233 deletions
diff --git a/mm/slub.c b/mm/slub.c
index 8c691fa1cf3c..e517d435e5dc 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -16,6 +16,7 @@
16#include <linux/interrupt.h> 16#include <linux/interrupt.h>
17#include <linux/bitops.h> 17#include <linux/bitops.h>
18#include <linux/slab.h> 18#include <linux/slab.h>
19#include "slab.h"
19#include <linux/proc_fs.h> 20#include <linux/proc_fs.h>
20#include <linux/seq_file.h> 21#include <linux/seq_file.h>
21#include <linux/kmemcheck.h> 22#include <linux/kmemcheck.h>
@@ -35,13 +36,13 @@
35 36
36/* 37/*
37 * Lock order: 38 * Lock order:
38 * 1. slub_lock (Global Semaphore) 39 * 1. slab_mutex (Global Mutex)
39 * 2. node->list_lock 40 * 2. node->list_lock
40 * 3. slab_lock(page) (Only on some arches and for debugging) 41 * 3. slab_lock(page) (Only on some arches and for debugging)
41 * 42 *
42 * slub_lock 43 * slab_mutex
43 * 44 *
44 * The role of the slub_lock is to protect the list of all the slabs 45 * The role of the slab_mutex is to protect the list of all the slabs
45 * and to synchronize major metadata changes to slab cache structures. 46 * and to synchronize major metadata changes to slab cache structures.
46 * 47 *
47 * The slab_lock is only used for debugging and on arches that do not 48 * The slab_lock is only used for debugging and on arches that do not
@@ -182,17 +183,6 @@ static int kmem_size = sizeof(struct kmem_cache);
182static struct notifier_block slab_notifier; 183static struct notifier_block slab_notifier;
183#endif 184#endif
184 185
185static enum {
186 DOWN, /* No slab functionality available */
187 PARTIAL, /* Kmem_cache_node works */
188 UP, /* Everything works but does not show up in sysfs */
189 SYSFS /* Sysfs up */
190} slab_state = DOWN;
191
192/* A list of all slab caches on the system */
193static DECLARE_RWSEM(slub_lock);
194static LIST_HEAD(slab_caches);
195
196/* 186/*
197 * Tracking user of a slab. 187 * Tracking user of a slab.
198 */ 188 */
@@ -237,11 +227,6 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si)
237 * Core slab cache functions 227 * Core slab cache functions
238 *******************************************************************/ 228 *******************************************************************/
239 229
240int slab_is_available(void)
241{
242 return slab_state >= UP;
243}
244
245static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) 230static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
246{ 231{
247 return s->node[node]; 232 return s->node[node];
@@ -311,7 +296,7 @@ static inline size_t slab_ksize(const struct kmem_cache *s)
311 * and whatever may come after it. 296 * and whatever may come after it.
312 */ 297 */
313 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) 298 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
314 return s->objsize; 299 return s->object_size;
315 300
316#endif 301#endif
317 /* 302 /*
@@ -609,11 +594,11 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
609 if (p > addr + 16) 594 if (p > addr + 16)
610 print_section("Bytes b4 ", p - 16, 16); 595 print_section("Bytes b4 ", p - 16, 16);
611 596
612 print_section("Object ", p, min_t(unsigned long, s->objsize, 597 print_section("Object ", p, min_t(unsigned long, s->object_size,
613 PAGE_SIZE)); 598 PAGE_SIZE));
614 if (s->flags & SLAB_RED_ZONE) 599 if (s->flags & SLAB_RED_ZONE)
615 print_section("Redzone ", p + s->objsize, 600 print_section("Redzone ", p + s->object_size,
616 s->inuse - s->objsize); 601 s->inuse - s->object_size);
617 602
618 if (s->offset) 603 if (s->offset)
619 off = s->offset + sizeof(void *); 604 off = s->offset + sizeof(void *);
@@ -655,12 +640,12 @@ static void init_object(struct kmem_cache *s, void *object, u8 val)
655 u8 *p = object; 640 u8 *p = object;
656 641
657 if (s->flags & __OBJECT_POISON) { 642 if (s->flags & __OBJECT_POISON) {
658 memset(p, POISON_FREE, s->objsize - 1); 643 memset(p, POISON_FREE, s->object_size - 1);
659 p[s->objsize - 1] = POISON_END; 644 p[s->object_size - 1] = POISON_END;
660 } 645 }
661 646
662 if (s->flags & SLAB_RED_ZONE) 647 if (s->flags & SLAB_RED_ZONE)
663 memset(p + s->objsize, val, s->inuse - s->objsize); 648 memset(p + s->object_size, val, s->inuse - s->object_size);
664} 649}
665 650
666static void restore_bytes(struct kmem_cache *s, char *message, u8 data, 651static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
@@ -705,10 +690,10 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
705 * Poisoning uses 0x6b (POISON_FREE) and the last byte is 690 * Poisoning uses 0x6b (POISON_FREE) and the last byte is
706 * 0xa5 (POISON_END) 691 * 0xa5 (POISON_END)
707 * 692 *
708 * object + s->objsize 693 * object + s->object_size
709 * Padding to reach word boundary. This is also used for Redzoning. 694 * Padding to reach word boundary. This is also used for Redzoning.
710 * Padding is extended by another word if Redzoning is enabled and 695 * Padding is extended by another word if Redzoning is enabled and
711 * objsize == inuse. 696 * object_size == inuse.
712 * 697 *
713 * We fill with 0xbb (RED_INACTIVE) for inactive objects and with 698 * We fill with 0xbb (RED_INACTIVE) for inactive objects and with
714 * 0xcc (RED_ACTIVE) for objects in use. 699 * 0xcc (RED_ACTIVE) for objects in use.
@@ -727,7 +712,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
727 * object + s->size 712 * object + s->size
728 * Nothing is used beyond s->size. 713 * Nothing is used beyond s->size.
729 * 714 *
730 * If slabcaches are merged then the objsize and inuse boundaries are mostly 715 * If slabcaches are merged then the object_size and inuse boundaries are mostly
731 * ignored. And therefore no slab options that rely on these boundaries 716 * ignored. And therefore no slab options that rely on these boundaries
732 * may be used with merged slabcaches. 717 * may be used with merged slabcaches.
733 */ 718 */
@@ -787,25 +772,25 @@ static int check_object(struct kmem_cache *s, struct page *page,
787 void *object, u8 val) 772 void *object, u8 val)
788{ 773{
789 u8 *p = object; 774 u8 *p = object;
790 u8 *endobject = object + s->objsize; 775 u8 *endobject = object + s->object_size;
791 776
792 if (s->flags & SLAB_RED_ZONE) { 777 if (s->flags & SLAB_RED_ZONE) {
793 if (!check_bytes_and_report(s, page, object, "Redzone", 778 if (!check_bytes_and_report(s, page, object, "Redzone",
794 endobject, val, s->inuse - s->objsize)) 779 endobject, val, s->inuse - s->object_size))
795 return 0; 780 return 0;
796 } else { 781 } else {
797 if ((s->flags & SLAB_POISON) && s->objsize < s->inuse) { 782 if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
798 check_bytes_and_report(s, page, p, "Alignment padding", 783 check_bytes_and_report(s, page, p, "Alignment padding",
799 endobject, POISON_INUSE, s->inuse - s->objsize); 784 endobject, POISON_INUSE, s->inuse - s->object_size);
800 } 785 }
801 } 786 }
802 787
803 if (s->flags & SLAB_POISON) { 788 if (s->flags & SLAB_POISON) {
804 if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) && 789 if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
805 (!check_bytes_and_report(s, page, p, "Poison", p, 790 (!check_bytes_and_report(s, page, p, "Poison", p,
806 POISON_FREE, s->objsize - 1) || 791 POISON_FREE, s->object_size - 1) ||
807 !check_bytes_and_report(s, page, p, "Poison", 792 !check_bytes_and_report(s, page, p, "Poison",
808 p + s->objsize - 1, POISON_END, 1))) 793 p + s->object_size - 1, POISON_END, 1)))
809 return 0; 794 return 0;
810 /* 795 /*
811 * check_pad_bytes cleans up on its own. 796 * check_pad_bytes cleans up on its own.
@@ -926,7 +911,7 @@ static void trace(struct kmem_cache *s, struct page *page, void *object,
926 page->freelist); 911 page->freelist);
927 912
928 if (!alloc) 913 if (!alloc)
929 print_section("Object ", (void *)object, s->objsize); 914 print_section("Object ", (void *)object, s->object_size);
930 915
931 dump_stack(); 916 dump_stack();
932 } 917 }
@@ -942,14 +927,14 @@ static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
942 lockdep_trace_alloc(flags); 927 lockdep_trace_alloc(flags);
943 might_sleep_if(flags & __GFP_WAIT); 928 might_sleep_if(flags & __GFP_WAIT);
944 929
945 return should_failslab(s->objsize, flags, s->flags); 930 return should_failslab(s->object_size, flags, s->flags);
946} 931}
947 932
948static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object) 933static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object)
949{ 934{
950 flags &= gfp_allowed_mask; 935 flags &= gfp_allowed_mask;
951 kmemcheck_slab_alloc(s, flags, object, slab_ksize(s)); 936 kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
952 kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, flags); 937 kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags);
953} 938}
954 939
955static inline void slab_free_hook(struct kmem_cache *s, void *x) 940static inline void slab_free_hook(struct kmem_cache *s, void *x)
@@ -966,13 +951,13 @@ static inline void slab_free_hook(struct kmem_cache *s, void *x)
966 unsigned long flags; 951 unsigned long flags;
967 952
968 local_irq_save(flags); 953 local_irq_save(flags);
969 kmemcheck_slab_free(s, x, s->objsize); 954 kmemcheck_slab_free(s, x, s->object_size);
970 debug_check_no_locks_freed(x, s->objsize); 955 debug_check_no_locks_freed(x, s->object_size);
971 local_irq_restore(flags); 956 local_irq_restore(flags);
972 } 957 }
973#endif 958#endif
974 if (!(s->flags & SLAB_DEBUG_OBJECTS)) 959 if (!(s->flags & SLAB_DEBUG_OBJECTS))
975 debug_check_no_obj_freed(x, s->objsize); 960 debug_check_no_obj_freed(x, s->object_size);
976} 961}
977 962
978/* 963/*
@@ -1207,7 +1192,7 @@ out:
1207 1192
1208__setup("slub_debug", setup_slub_debug); 1193__setup("slub_debug", setup_slub_debug);
1209 1194
1210static unsigned long kmem_cache_flags(unsigned long objsize, 1195static unsigned long kmem_cache_flags(unsigned long object_size,
1211 unsigned long flags, const char *name, 1196 unsigned long flags, const char *name,
1212 void (*ctor)(void *)) 1197 void (*ctor)(void *))
1213{ 1198{
@@ -1237,7 +1222,7 @@ static inline int check_object(struct kmem_cache *s, struct page *page,
1237static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n, 1222static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
1238 struct page *page) {} 1223 struct page *page) {}
1239static inline void remove_full(struct kmem_cache *s, struct page *page) {} 1224static inline void remove_full(struct kmem_cache *s, struct page *page) {}
1240static inline unsigned long kmem_cache_flags(unsigned long objsize, 1225static inline unsigned long kmem_cache_flags(unsigned long object_size,
1241 unsigned long flags, const char *name, 1226 unsigned long flags, const char *name,
1242 void (*ctor)(void *)) 1227 void (*ctor)(void *))
1243{ 1228{
@@ -1314,13 +1299,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1314 stat(s, ORDER_FALLBACK); 1299 stat(s, ORDER_FALLBACK);
1315 } 1300 }
1316 1301
1317 if (flags & __GFP_WAIT) 1302 if (kmemcheck_enabled && page
1318 local_irq_disable();
1319
1320 if (!page)
1321 return NULL;
1322
1323 if (kmemcheck_enabled
1324 && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) { 1303 && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) {
1325 int pages = 1 << oo_order(oo); 1304 int pages = 1 << oo_order(oo);
1326 1305
@@ -1336,6 +1315,11 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1336 kmemcheck_mark_unallocated_pages(page, pages); 1315 kmemcheck_mark_unallocated_pages(page, pages);
1337 } 1316 }
1338 1317
1318 if (flags & __GFP_WAIT)
1319 local_irq_disable();
1320 if (!page)
1321 return NULL;
1322
1339 page->objects = oo_objects(oo); 1323 page->objects = oo_objects(oo);
1340 mod_zone_page_state(page_zone(page), 1324 mod_zone_page_state(page_zone(page),
1341 (s->flags & SLAB_RECLAIM_ACCOUNT) ? 1325 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
@@ -1490,12 +1474,12 @@ static inline void remove_partial(struct kmem_cache_node *n,
1490} 1474}
1491 1475
1492/* 1476/*
1493 * Lock slab, remove from the partial list and put the object into the 1477 * Remove slab from the partial list, freeze it and
1494 * per cpu freelist. 1478 * return the pointer to the freelist.
1495 * 1479 *
1496 * Returns a list of objects or NULL if it fails. 1480 * Returns a list of objects or NULL if it fails.
1497 * 1481 *
1498 * Must hold list_lock. 1482 * Must hold list_lock since we modify the partial list.
1499 */ 1483 */
1500static inline void *acquire_slab(struct kmem_cache *s, 1484static inline void *acquire_slab(struct kmem_cache *s,
1501 struct kmem_cache_node *n, struct page *page, 1485 struct kmem_cache_node *n, struct page *page,
@@ -1510,26 +1494,27 @@ static inline void *acquire_slab(struct kmem_cache *s,
1510 * The old freelist is the list of objects for the 1494 * The old freelist is the list of objects for the
1511 * per cpu allocation list. 1495 * per cpu allocation list.
1512 */ 1496 */
1513 do { 1497 freelist = page->freelist;
1514 freelist = page->freelist; 1498 counters = page->counters;
1515 counters = page->counters; 1499 new.counters = counters;
1516 new.counters = counters; 1500 if (mode) {
1517 if (mode) { 1501 new.inuse = page->objects;
1518 new.inuse = page->objects; 1502 new.freelist = NULL;
1519 new.freelist = NULL; 1503 } else {
1520 } else { 1504 new.freelist = freelist;
1521 new.freelist = freelist; 1505 }
1522 }
1523 1506
1524 VM_BUG_ON(new.frozen); 1507 VM_BUG_ON(new.frozen);
1525 new.frozen = 1; 1508 new.frozen = 1;
1526 1509
1527 } while (!__cmpxchg_double_slab(s, page, 1510 if (!__cmpxchg_double_slab(s, page,
1528 freelist, counters, 1511 freelist, counters,
1529 new.freelist, new.counters, 1512 new.freelist, new.counters,
1530 "lock and freeze")); 1513 "acquire_slab"))
1514 return NULL;
1531 1515
1532 remove_partial(n, page); 1516 remove_partial(n, page);
1517 WARN_ON(!freelist);
1533 return freelist; 1518 return freelist;
1534} 1519}
1535 1520
@@ -1563,7 +1548,6 @@ static void *get_partial_node(struct kmem_cache *s,
1563 1548
1564 if (!object) { 1549 if (!object) {
1565 c->page = page; 1550 c->page = page;
1566 c->node = page_to_nid(page);
1567 stat(s, ALLOC_FROM_PARTIAL); 1551 stat(s, ALLOC_FROM_PARTIAL);
1568 object = t; 1552 object = t;
1569 available = page->objects - page->inuse; 1553 available = page->objects - page->inuse;
@@ -1617,7 +1601,7 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
1617 1601
1618 do { 1602 do {
1619 cpuset_mems_cookie = get_mems_allowed(); 1603 cpuset_mems_cookie = get_mems_allowed();
1620 zonelist = node_zonelist(slab_node(current->mempolicy), flags); 1604 zonelist = node_zonelist(slab_node(), flags);
1621 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { 1605 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
1622 struct kmem_cache_node *n; 1606 struct kmem_cache_node *n;
1623 1607
@@ -1731,14 +1715,12 @@ void init_kmem_cache_cpus(struct kmem_cache *s)
1731/* 1715/*
1732 * Remove the cpu slab 1716 * Remove the cpu slab
1733 */ 1717 */
1734static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) 1718static void deactivate_slab(struct kmem_cache *s, struct page *page, void *freelist)
1735{ 1719{
1736 enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE }; 1720 enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
1737 struct page *page = c->page;
1738 struct kmem_cache_node *n = get_node(s, page_to_nid(page)); 1721 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1739 int lock = 0; 1722 int lock = 0;
1740 enum slab_modes l = M_NONE, m = M_NONE; 1723 enum slab_modes l = M_NONE, m = M_NONE;
1741 void *freelist;
1742 void *nextfree; 1724 void *nextfree;
1743 int tail = DEACTIVATE_TO_HEAD; 1725 int tail = DEACTIVATE_TO_HEAD;
1744 struct page new; 1726 struct page new;
@@ -1749,11 +1731,6 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1749 tail = DEACTIVATE_TO_TAIL; 1731 tail = DEACTIVATE_TO_TAIL;
1750 } 1732 }
1751 1733
1752 c->tid = next_tid(c->tid);
1753 c->page = NULL;
1754 freelist = c->freelist;
1755 c->freelist = NULL;
1756
1757 /* 1734 /*
1758 * Stage one: Free all available per cpu objects back 1735 * Stage one: Free all available per cpu objects back
1759 * to the page freelist while it is still frozen. Leave the 1736 * to the page freelist while it is still frozen. Leave the
@@ -1879,21 +1856,31 @@ redo:
1879 } 1856 }
1880} 1857}
1881 1858
1882/* Unfreeze all the cpu partial slabs */ 1859/*
1860 * Unfreeze all the cpu partial slabs.
1861 *
1862 * This function must be called with interrupt disabled.
1863 */
1883static void unfreeze_partials(struct kmem_cache *s) 1864static void unfreeze_partials(struct kmem_cache *s)
1884{ 1865{
1885 struct kmem_cache_node *n = NULL; 1866 struct kmem_cache_node *n = NULL, *n2 = NULL;
1886 struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab); 1867 struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);
1887 struct page *page, *discard_page = NULL; 1868 struct page *page, *discard_page = NULL;
1888 1869
1889 while ((page = c->partial)) { 1870 while ((page = c->partial)) {
1890 enum slab_modes { M_PARTIAL, M_FREE };
1891 enum slab_modes l, m;
1892 struct page new; 1871 struct page new;
1893 struct page old; 1872 struct page old;
1894 1873
1895 c->partial = page->next; 1874 c->partial = page->next;
1896 l = M_FREE; 1875
1876 n2 = get_node(s, page_to_nid(page));
1877 if (n != n2) {
1878 if (n)
1879 spin_unlock(&n->list_lock);
1880
1881 n = n2;
1882 spin_lock(&n->list_lock);
1883 }
1897 1884
1898 do { 1885 do {
1899 1886
@@ -1906,43 +1893,17 @@ static void unfreeze_partials(struct kmem_cache *s)
1906 1893
1907 new.frozen = 0; 1894 new.frozen = 0;
1908 1895
1909 if (!new.inuse && (!n || n->nr_partial > s->min_partial)) 1896 } while (!__cmpxchg_double_slab(s, page,
1910 m = M_FREE;
1911 else {
1912 struct kmem_cache_node *n2 = get_node(s,
1913 page_to_nid(page));
1914
1915 m = M_PARTIAL;
1916 if (n != n2) {
1917 if (n)
1918 spin_unlock(&n->list_lock);
1919
1920 n = n2;
1921 spin_lock(&n->list_lock);
1922 }
1923 }
1924
1925 if (l != m) {
1926 if (l == M_PARTIAL) {
1927 remove_partial(n, page);
1928 stat(s, FREE_REMOVE_PARTIAL);
1929 } else {
1930 add_partial(n, page,
1931 DEACTIVATE_TO_TAIL);
1932 stat(s, FREE_ADD_PARTIAL);
1933 }
1934
1935 l = m;
1936 }
1937
1938 } while (!cmpxchg_double_slab(s, page,
1939 old.freelist, old.counters, 1897 old.freelist, old.counters,
1940 new.freelist, new.counters, 1898 new.freelist, new.counters,
1941 "unfreezing slab")); 1899 "unfreezing slab"));
1942 1900
1943 if (m == M_FREE) { 1901 if (unlikely(!new.inuse && n->nr_partial > s->min_partial)) {
1944 page->next = discard_page; 1902 page->next = discard_page;
1945 discard_page = page; 1903 discard_page = page;
1904 } else {
1905 add_partial(n, page, DEACTIVATE_TO_TAIL);
1906 stat(s, FREE_ADD_PARTIAL);
1946 } 1907 }
1947 } 1908 }
1948 1909
@@ -2011,7 +1972,11 @@ int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
2011static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) 1972static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
2012{ 1973{
2013 stat(s, CPUSLAB_FLUSH); 1974 stat(s, CPUSLAB_FLUSH);
2014 deactivate_slab(s, c); 1975 deactivate_slab(s, c->page, c->freelist);
1976
1977 c->tid = next_tid(c->tid);
1978 c->page = NULL;
1979 c->freelist = NULL;
2015} 1980}
2016 1981
2017/* 1982/*
@@ -2055,10 +2020,10 @@ static void flush_all(struct kmem_cache *s)
2055 * Check if the objects in a per cpu structure fit numa 2020 * Check if the objects in a per cpu structure fit numa
2056 * locality expectations. 2021 * locality expectations.
2057 */ 2022 */
2058static inline int node_match(struct kmem_cache_cpu *c, int node) 2023static inline int node_match(struct page *page, int node)
2059{ 2024{
2060#ifdef CONFIG_NUMA 2025#ifdef CONFIG_NUMA
2061 if (node != NUMA_NO_NODE && c->node != node) 2026 if (node != NUMA_NO_NODE && page_to_nid(page) != node)
2062 return 0; 2027 return 0;
2063#endif 2028#endif
2064 return 1; 2029 return 1;
@@ -2101,10 +2066,10 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
2101 "SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n", 2066 "SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n",
2102 nid, gfpflags); 2067 nid, gfpflags);
2103 printk(KERN_WARNING " cache: %s, object size: %d, buffer size: %d, " 2068 printk(KERN_WARNING " cache: %s, object size: %d, buffer size: %d, "
2104 "default order: %d, min order: %d\n", s->name, s->objsize, 2069 "default order: %d, min order: %d\n", s->name, s->object_size,
2105 s->size, oo_order(s->oo), oo_order(s->min)); 2070 s->size, oo_order(s->oo), oo_order(s->min));
2106 2071
2107 if (oo_order(s->min) > get_order(s->objsize)) 2072 if (oo_order(s->min) > get_order(s->object_size))
2108 printk(KERN_WARNING " %s debugging increased min order, use " 2073 printk(KERN_WARNING " %s debugging increased min order, use "
2109 "slub_debug=O to disable.\n", s->name); 2074 "slub_debug=O to disable.\n", s->name);
2110 2075
@@ -2130,10 +2095,16 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
2130static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags, 2095static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
2131 int node, struct kmem_cache_cpu **pc) 2096 int node, struct kmem_cache_cpu **pc)
2132{ 2097{
2133 void *object; 2098 void *freelist;
2134 struct kmem_cache_cpu *c; 2099 struct kmem_cache_cpu *c = *pc;
2135 struct page *page = new_slab(s, flags, node); 2100 struct page *page;
2101
2102 freelist = get_partial(s, flags, node, c);
2136 2103
2104 if (freelist)
2105 return freelist;
2106
2107 page = new_slab(s, flags, node);
2137 if (page) { 2108 if (page) {
2138 c = __this_cpu_ptr(s->cpu_slab); 2109 c = __this_cpu_ptr(s->cpu_slab);
2139 if (c->page) 2110 if (c->page)
@@ -2143,17 +2114,16 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
2143 * No other reference to the page yet so we can 2114 * No other reference to the page yet so we can
2144 * muck around with it freely without cmpxchg 2115 * muck around with it freely without cmpxchg
2145 */ 2116 */
2146 object = page->freelist; 2117 freelist = page->freelist;
2147 page->freelist = NULL; 2118 page->freelist = NULL;
2148 2119
2149 stat(s, ALLOC_SLAB); 2120 stat(s, ALLOC_SLAB);
2150 c->node = page_to_nid(page);
2151 c->page = page; 2121 c->page = page;
2152 *pc = c; 2122 *pc = c;
2153 } else 2123 } else
2154 object = NULL; 2124 freelist = NULL;
2155 2125
2156 return object; 2126 return freelist;
2157} 2127}
2158 2128
2159/* 2129/*
@@ -2163,6 +2133,8 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
2163 * The page is still frozen if the return value is not NULL. 2133 * The page is still frozen if the return value is not NULL.
2164 * 2134 *
2165 * If this function returns NULL then the page has been unfrozen. 2135 * If this function returns NULL then the page has been unfrozen.
2136 *
2137 * This function must be called with interrupt disabled.
2166 */ 2138 */
2167static inline void *get_freelist(struct kmem_cache *s, struct page *page) 2139static inline void *get_freelist(struct kmem_cache *s, struct page *page)
2168{ 2140{
@@ -2173,13 +2145,14 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page)
2173 do { 2145 do {
2174 freelist = page->freelist; 2146 freelist = page->freelist;
2175 counters = page->counters; 2147 counters = page->counters;
2148
2176 new.counters = counters; 2149 new.counters = counters;
2177 VM_BUG_ON(!new.frozen); 2150 VM_BUG_ON(!new.frozen);
2178 2151
2179 new.inuse = page->objects; 2152 new.inuse = page->objects;
2180 new.frozen = freelist != NULL; 2153 new.frozen = freelist != NULL;
2181 2154
2182 } while (!cmpxchg_double_slab(s, page, 2155 } while (!__cmpxchg_double_slab(s, page,
2183 freelist, counters, 2156 freelist, counters,
2184 NULL, new.counters, 2157 NULL, new.counters,
2185 "get_freelist")); 2158 "get_freelist"));
@@ -2206,7 +2179,8 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page)
2206static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, 2179static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2207 unsigned long addr, struct kmem_cache_cpu *c) 2180 unsigned long addr, struct kmem_cache_cpu *c)
2208{ 2181{
2209 void **object; 2182 void *freelist;
2183 struct page *page;
2210 unsigned long flags; 2184 unsigned long flags;
2211 2185
2212 local_irq_save(flags); 2186 local_irq_save(flags);
@@ -2219,25 +2193,29 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2219 c = this_cpu_ptr(s->cpu_slab); 2193 c = this_cpu_ptr(s->cpu_slab);
2220#endif 2194#endif
2221 2195
2222 if (!c->page) 2196 page = c->page;
2197 if (!page)
2223 goto new_slab; 2198 goto new_slab;
2224redo: 2199redo:
2225 if (unlikely(!node_match(c, node))) { 2200
2201 if (unlikely(!node_match(page, node))) {
2226 stat(s, ALLOC_NODE_MISMATCH); 2202 stat(s, ALLOC_NODE_MISMATCH);
2227 deactivate_slab(s, c); 2203 deactivate_slab(s, page, c->freelist);
2204 c->page = NULL;
2205 c->freelist = NULL;
2228 goto new_slab; 2206 goto new_slab;
2229 } 2207 }
2230 2208
2231 /* must check again c->freelist in case of cpu migration or IRQ */ 2209 /* must check again c->freelist in case of cpu migration or IRQ */
2232 object = c->freelist; 2210 freelist = c->freelist;
2233 if (object) 2211 if (freelist)
2234 goto load_freelist; 2212 goto load_freelist;
2235 2213
2236 stat(s, ALLOC_SLOWPATH); 2214 stat(s, ALLOC_SLOWPATH);
2237 2215
2238 object = get_freelist(s, c->page); 2216 freelist = get_freelist(s, page);
2239 2217
2240 if (!object) { 2218 if (!freelist) {
2241 c->page = NULL; 2219 c->page = NULL;
2242 stat(s, DEACTIVATE_BYPASS); 2220 stat(s, DEACTIVATE_BYPASS);
2243 goto new_slab; 2221 goto new_slab;
@@ -2246,50 +2224,50 @@ redo:
2246 stat(s, ALLOC_REFILL); 2224 stat(s, ALLOC_REFILL);
2247 2225
2248load_freelist: 2226load_freelist:
2249 c->freelist = get_freepointer(s, object); 2227 /*
2228 * freelist is pointing to the list of objects to be used.
2229 * page is pointing to the page from which the objects are obtained.
2230 * That page must be frozen for per cpu allocations to work.
2231 */
2232 VM_BUG_ON(!c->page->frozen);
2233 c->freelist = get_freepointer(s, freelist);
2250 c->tid = next_tid(c->tid); 2234 c->tid = next_tid(c->tid);
2251 local_irq_restore(flags); 2235 local_irq_restore(flags);
2252 return object; 2236 return freelist;
2253 2237
2254new_slab: 2238new_slab:
2255 2239
2256 if (c->partial) { 2240 if (c->partial) {
2257 c->page = c->partial; 2241 page = c->page = c->partial;
2258 c->partial = c->page->next; 2242 c->partial = page->next;
2259 c->node = page_to_nid(c->page);
2260 stat(s, CPU_PARTIAL_ALLOC); 2243 stat(s, CPU_PARTIAL_ALLOC);
2261 c->freelist = NULL; 2244 c->freelist = NULL;
2262 goto redo; 2245 goto redo;
2263 } 2246 }
2264 2247
2265 /* Then do expensive stuff like retrieving pages from the partial lists */ 2248 freelist = new_slab_objects(s, gfpflags, node, &c);
2266 object = get_partial(s, gfpflags, node, c);
2267 2249
2268 if (unlikely(!object)) { 2250 if (unlikely(!freelist)) {
2251 if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
2252 slab_out_of_memory(s, gfpflags, node);
2269 2253
2270 object = new_slab_objects(s, gfpflags, node, &c); 2254 local_irq_restore(flags);
2271 2255 return NULL;
2272 if (unlikely(!object)) {
2273 if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
2274 slab_out_of_memory(s, gfpflags, node);
2275
2276 local_irq_restore(flags);
2277 return NULL;
2278 }
2279 } 2256 }
2280 2257
2258 page = c->page;
2281 if (likely(!kmem_cache_debug(s))) 2259 if (likely(!kmem_cache_debug(s)))
2282 goto load_freelist; 2260 goto load_freelist;
2283 2261
2284 /* Only entered in the debug case */ 2262 /* Only entered in the debug case */
2285 if (!alloc_debug_processing(s, c->page, object, addr)) 2263 if (!alloc_debug_processing(s, page, freelist, addr))
2286 goto new_slab; /* Slab failed checks. Next slab needed */ 2264 goto new_slab; /* Slab failed checks. Next slab needed */
2287 2265
2288 c->freelist = get_freepointer(s, object); 2266 deactivate_slab(s, page, get_freepointer(s, freelist));
2289 deactivate_slab(s, c); 2267 c->page = NULL;
2290 c->node = NUMA_NO_NODE; 2268 c->freelist = NULL;
2291 local_irq_restore(flags); 2269 local_irq_restore(flags);
2292 return object; 2270 return freelist;
2293} 2271}
2294 2272
2295/* 2273/*
@@ -2307,6 +2285,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
2307{ 2285{
2308 void **object; 2286 void **object;
2309 struct kmem_cache_cpu *c; 2287 struct kmem_cache_cpu *c;
2288 struct page *page;
2310 unsigned long tid; 2289 unsigned long tid;
2311 2290
2312 if (slab_pre_alloc_hook(s, gfpflags)) 2291 if (slab_pre_alloc_hook(s, gfpflags))
@@ -2332,7 +2311,8 @@ redo:
2332 barrier(); 2311 barrier();
2333 2312
2334 object = c->freelist; 2313 object = c->freelist;
2335 if (unlikely(!object || !node_match(c, node))) 2314 page = c->page;
2315 if (unlikely(!object || !node_match(page, node)))
2336 2316
2337 object = __slab_alloc(s, gfpflags, node, addr, c); 2317 object = __slab_alloc(s, gfpflags, node, addr, c);
2338 2318
@@ -2364,7 +2344,7 @@ redo:
2364 } 2344 }
2365 2345
2366 if (unlikely(gfpflags & __GFP_ZERO) && object) 2346 if (unlikely(gfpflags & __GFP_ZERO) && object)
2367 memset(object, 0, s->objsize); 2347 memset(object, 0, s->object_size);
2368 2348
2369 slab_post_alloc_hook(s, gfpflags, object); 2349 slab_post_alloc_hook(s, gfpflags, object);
2370 2350
@@ -2375,7 +2355,7 @@ void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
2375{ 2355{
2376 void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_); 2356 void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_);
2377 2357
2378 trace_kmem_cache_alloc(_RET_IP_, ret, s->objsize, s->size, gfpflags); 2358 trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size, s->size, gfpflags);
2379 2359
2380 return ret; 2360 return ret;
2381} 2361}
@@ -2405,7 +2385,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
2405 void *ret = slab_alloc(s, gfpflags, node, _RET_IP_); 2385 void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);
2406 2386
2407 trace_kmem_cache_alloc_node(_RET_IP_, ret, 2387 trace_kmem_cache_alloc_node(_RET_IP_, ret,
2408 s->objsize, s->size, gfpflags, node); 2388 s->object_size, s->size, gfpflags, node);
2409 2389
2410 return ret; 2390 return ret;
2411} 2391}
@@ -2900,7 +2880,7 @@ static void set_min_partial(struct kmem_cache *s, unsigned long min)
2900static int calculate_sizes(struct kmem_cache *s, int forced_order) 2880static int calculate_sizes(struct kmem_cache *s, int forced_order)
2901{ 2881{
2902 unsigned long flags = s->flags; 2882 unsigned long flags = s->flags;
2903 unsigned long size = s->objsize; 2883 unsigned long size = s->object_size;
2904 unsigned long align = s->align; 2884 unsigned long align = s->align;
2905 int order; 2885 int order;
2906 2886
@@ -2929,7 +2909,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
2929 * end of the object and the free pointer. If not then add an 2909 * end of the object and the free pointer. If not then add an
2930 * additional word to have some bytes to store Redzone information. 2910 * additional word to have some bytes to store Redzone information.
2931 */ 2911 */
2932 if ((flags & SLAB_RED_ZONE) && size == s->objsize) 2912 if ((flags & SLAB_RED_ZONE) && size == s->object_size)
2933 size += sizeof(void *); 2913 size += sizeof(void *);
2934#endif 2914#endif
2935 2915
@@ -2977,7 +2957,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
2977 * user specified and the dynamic determination of cache line size 2957 * user specified and the dynamic determination of cache line size
2978 * on bootup. 2958 * on bootup.
2979 */ 2959 */
2980 align = calculate_alignment(flags, align, s->objsize); 2960 align = calculate_alignment(flags, align, s->object_size);
2981 s->align = align; 2961 s->align = align;
2982 2962
2983 /* 2963 /*
@@ -3025,7 +3005,7 @@ static int kmem_cache_open(struct kmem_cache *s,
3025 memset(s, 0, kmem_size); 3005 memset(s, 0, kmem_size);
3026 s->name = name; 3006 s->name = name;
3027 s->ctor = ctor; 3007 s->ctor = ctor;
3028 s->objsize = size; 3008 s->object_size = size;
3029 s->align = align; 3009 s->align = align;
3030 s->flags = kmem_cache_flags(size, flags, name, ctor); 3010 s->flags = kmem_cache_flags(size, flags, name, ctor);
3031 s->reserved = 0; 3011 s->reserved = 0;
@@ -3040,7 +3020,7 @@ static int kmem_cache_open(struct kmem_cache *s,
3040 * Disable debugging flags that store metadata if the min slab 3020 * Disable debugging flags that store metadata if the min slab
3041 * order increased. 3021 * order increased.
3042 */ 3022 */
3043 if (get_order(s->size) > get_order(s->objsize)) { 3023 if (get_order(s->size) > get_order(s->object_size)) {
3044 s->flags &= ~DEBUG_METADATA_FLAGS; 3024 s->flags &= ~DEBUG_METADATA_FLAGS;
3045 s->offset = 0; 3025 s->offset = 0;
3046 if (!calculate_sizes(s, -1)) 3026 if (!calculate_sizes(s, -1))
@@ -3114,7 +3094,7 @@ error:
3114 */ 3094 */
3115unsigned int kmem_cache_size(struct kmem_cache *s) 3095unsigned int kmem_cache_size(struct kmem_cache *s)
3116{ 3096{
3117 return s->objsize; 3097 return s->object_size;
3118} 3098}
3119EXPORT_SYMBOL(kmem_cache_size); 3099EXPORT_SYMBOL(kmem_cache_size);
3120 3100
@@ -3192,11 +3172,11 @@ static inline int kmem_cache_close(struct kmem_cache *s)
3192 */ 3172 */
3193void kmem_cache_destroy(struct kmem_cache *s) 3173void kmem_cache_destroy(struct kmem_cache *s)
3194{ 3174{
3195 down_write(&slub_lock); 3175 mutex_lock(&slab_mutex);
3196 s->refcount--; 3176 s->refcount--;
3197 if (!s->refcount) { 3177 if (!s->refcount) {
3198 list_del(&s->list); 3178 list_del(&s->list);
3199 up_write(&slub_lock); 3179 mutex_unlock(&slab_mutex);
3200 if (kmem_cache_close(s)) { 3180 if (kmem_cache_close(s)) {
3201 printk(KERN_ERR "SLUB %s: %s called for cache that " 3181 printk(KERN_ERR "SLUB %s: %s called for cache that "
3202 "still has objects.\n", s->name, __func__); 3182 "still has objects.\n", s->name, __func__);
@@ -3206,7 +3186,7 @@ void kmem_cache_destroy(struct kmem_cache *s)
3206 rcu_barrier(); 3186 rcu_barrier();
3207 sysfs_slab_remove(s); 3187 sysfs_slab_remove(s);
3208 } else 3188 } else
3209 up_write(&slub_lock); 3189 mutex_unlock(&slab_mutex);
3210} 3190}
3211EXPORT_SYMBOL(kmem_cache_destroy); 3191EXPORT_SYMBOL(kmem_cache_destroy);
3212 3192
@@ -3268,7 +3248,7 @@ static struct kmem_cache *__init create_kmalloc_cache(const char *name,
3268 3248
3269 /* 3249 /*
3270 * This function is called with IRQs disabled during early-boot on 3250 * This function is called with IRQs disabled during early-boot on
3271 * single CPU so there's no need to take slub_lock here. 3251 * single CPU so there's no need to take slab_mutex here.
3272 */ 3252 */
3273 if (!kmem_cache_open(s, name, size, ARCH_KMALLOC_MINALIGN, 3253 if (!kmem_cache_open(s, name, size, ARCH_KMALLOC_MINALIGN,
3274 flags, NULL)) 3254 flags, NULL))
@@ -3553,10 +3533,10 @@ static int slab_mem_going_offline_callback(void *arg)
3553{ 3533{
3554 struct kmem_cache *s; 3534 struct kmem_cache *s;
3555 3535
3556 down_read(&slub_lock); 3536 mutex_lock(&slab_mutex);
3557 list_for_each_entry(s, &slab_caches, list) 3537 list_for_each_entry(s, &slab_caches, list)
3558 kmem_cache_shrink(s); 3538 kmem_cache_shrink(s);
3559 up_read(&slub_lock); 3539 mutex_unlock(&slab_mutex);
3560 3540
3561 return 0; 3541 return 0;
3562} 3542}
@@ -3577,7 +3557,7 @@ static void slab_mem_offline_callback(void *arg)
3577 if (offline_node < 0) 3557 if (offline_node < 0)
3578 return; 3558 return;
3579 3559
3580 down_read(&slub_lock); 3560 mutex_lock(&slab_mutex);
3581 list_for_each_entry(s, &slab_caches, list) { 3561 list_for_each_entry(s, &slab_caches, list) {
3582 n = get_node(s, offline_node); 3562 n = get_node(s, offline_node);
3583 if (n) { 3563 if (n) {
@@ -3593,7 +3573,7 @@ static void slab_mem_offline_callback(void *arg)
3593 kmem_cache_free(kmem_cache_node, n); 3573 kmem_cache_free(kmem_cache_node, n);
3594 } 3574 }
3595 } 3575 }
3596 up_read(&slub_lock); 3576 mutex_unlock(&slab_mutex);
3597} 3577}
3598 3578
3599static int slab_mem_going_online_callback(void *arg) 3579static int slab_mem_going_online_callback(void *arg)
@@ -3616,7 +3596,7 @@ static int slab_mem_going_online_callback(void *arg)
3616 * allocate a kmem_cache_node structure in order to bring the node 3596 * allocate a kmem_cache_node structure in order to bring the node
3617 * online. 3597 * online.
3618 */ 3598 */
3619 down_read(&slub_lock); 3599 mutex_lock(&slab_mutex);
3620 list_for_each_entry(s, &slab_caches, list) { 3600 list_for_each_entry(s, &slab_caches, list) {
3621 /* 3601 /*
3622 * XXX: kmem_cache_alloc_node will fallback to other nodes 3602 * XXX: kmem_cache_alloc_node will fallback to other nodes
@@ -3632,7 +3612,7 @@ static int slab_mem_going_online_callback(void *arg)
3632 s->node[nid] = n; 3612 s->node[nid] = n;
3633 } 3613 }
3634out: 3614out:
3635 up_read(&slub_lock); 3615 mutex_unlock(&slab_mutex);
3636 return ret; 3616 return ret;
3637} 3617}
3638 3618
@@ -3843,11 +3823,11 @@ void __init kmem_cache_init(void)
3843 3823
3844 if (s && s->size) { 3824 if (s && s->size) {
3845 char *name = kasprintf(GFP_NOWAIT, 3825 char *name = kasprintf(GFP_NOWAIT,
3846 "dma-kmalloc-%d", s->objsize); 3826 "dma-kmalloc-%d", s->object_size);
3847 3827
3848 BUG_ON(!name); 3828 BUG_ON(!name);
3849 kmalloc_dma_caches[i] = create_kmalloc_cache(name, 3829 kmalloc_dma_caches[i] = create_kmalloc_cache(name,
3850 s->objsize, SLAB_CACHE_DMA); 3830 s->object_size, SLAB_CACHE_DMA);
3851 } 3831 }
3852 } 3832 }
3853#endif 3833#endif
@@ -3924,16 +3904,12 @@ static struct kmem_cache *find_mergeable(size_t size,
3924 return NULL; 3904 return NULL;
3925} 3905}
3926 3906
3927struct kmem_cache *kmem_cache_create(const char *name, size_t size, 3907struct kmem_cache *__kmem_cache_create(const char *name, size_t size,
3928 size_t align, unsigned long flags, void (*ctor)(void *)) 3908 size_t align, unsigned long flags, void (*ctor)(void *))
3929{ 3909{
3930 struct kmem_cache *s; 3910 struct kmem_cache *s;
3931 char *n; 3911 char *n;
3932 3912
3933 if (WARN_ON(!name))
3934 return NULL;
3935
3936 down_write(&slub_lock);
3937 s = find_mergeable(size, align, flags, name, ctor); 3913 s = find_mergeable(size, align, flags, name, ctor);
3938 if (s) { 3914 if (s) {
3939 s->refcount++; 3915 s->refcount++;
@@ -3941,49 +3917,42 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
3941 * Adjust the object sizes so that we clear 3917 * Adjust the object sizes so that we clear
3942 * the complete object on kzalloc. 3918 * the complete object on kzalloc.
3943 */ 3919 */
3944 s->objsize = max(s->objsize, (int)size); 3920 s->object_size = max(s->object_size, (int)size);
3945 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); 3921 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
3946 3922
3947 if (sysfs_slab_alias(s, name)) { 3923 if (sysfs_slab_alias(s, name)) {
3948 s->refcount--; 3924 s->refcount--;
3949 goto err; 3925 return NULL;
3950 } 3926 }
3951 up_write(&slub_lock);
3952 return s; 3927 return s;
3953 } 3928 }
3954 3929
3955 n = kstrdup(name, GFP_KERNEL); 3930 n = kstrdup(name, GFP_KERNEL);
3956 if (!n) 3931 if (!n)
3957 goto err; 3932 return NULL;
3958 3933
3959 s = kmalloc(kmem_size, GFP_KERNEL); 3934 s = kmalloc(kmem_size, GFP_KERNEL);
3960 if (s) { 3935 if (s) {
3961 if (kmem_cache_open(s, n, 3936 if (kmem_cache_open(s, n,
3962 size, align, flags, ctor)) { 3937 size, align, flags, ctor)) {
3938 int r;
3939
3963 list_add(&s->list, &slab_caches); 3940 list_add(&s->list, &slab_caches);
3964 up_write(&slub_lock); 3941 mutex_unlock(&slab_mutex);
3965 if (sysfs_slab_add(s)) { 3942 r = sysfs_slab_add(s);
3966 down_write(&slub_lock); 3943 mutex_lock(&slab_mutex);
3967 list_del(&s->list); 3944
3968 kfree(n); 3945 if (!r)
3969 kfree(s); 3946 return s;
3970 goto err; 3947
3971 } 3948 list_del(&s->list);
3972 return s; 3949 kmem_cache_close(s);
3973 } 3950 }
3974 kfree(s); 3951 kfree(s);
3975 } 3952 }
3976 kfree(n); 3953 kfree(n);
3977err: 3954 return NULL;
3978 up_write(&slub_lock);
3979
3980 if (flags & SLAB_PANIC)
3981 panic("Cannot create slabcache %s\n", name);
3982 else
3983 s = NULL;
3984 return s;
3985} 3955}
3986EXPORT_SYMBOL(kmem_cache_create);
3987 3956
3988#ifdef CONFIG_SMP 3957#ifdef CONFIG_SMP
3989/* 3958/*
@@ -4002,13 +3971,13 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
4002 case CPU_UP_CANCELED_FROZEN: 3971 case CPU_UP_CANCELED_FROZEN:
4003 case CPU_DEAD: 3972 case CPU_DEAD:
4004 case CPU_DEAD_FROZEN: 3973 case CPU_DEAD_FROZEN:
4005 down_read(&slub_lock); 3974 mutex_lock(&slab_mutex);
4006 list_for_each_entry(s, &slab_caches, list) { 3975 list_for_each_entry(s, &slab_caches, list) {
4007 local_irq_save(flags); 3976 local_irq_save(flags);
4008 __flush_cpu_slab(s, cpu); 3977 __flush_cpu_slab(s, cpu);
4009 local_irq_restore(flags); 3978 local_irq_restore(flags);
4010 } 3979 }
4011 up_read(&slub_lock); 3980 mutex_unlock(&slab_mutex);
4012 break; 3981 break;
4013 default: 3982 default:
4014 break; 3983 break;
@@ -4500,30 +4469,31 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
4500 4469
4501 for_each_possible_cpu(cpu) { 4470 for_each_possible_cpu(cpu) {
4502 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); 4471 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
4503 int node = ACCESS_ONCE(c->node); 4472 int node;
4504 struct page *page; 4473 struct page *page;
4505 4474
4506 if (node < 0)
4507 continue;
4508 page = ACCESS_ONCE(c->page); 4475 page = ACCESS_ONCE(c->page);
4509 if (page) { 4476 if (!page)
4510 if (flags & SO_TOTAL) 4477 continue;
4511 x = page->objects;
4512 else if (flags & SO_OBJECTS)
4513 x = page->inuse;
4514 else
4515 x = 1;
4516 4478
4517 total += x; 4479 node = page_to_nid(page);
4518 nodes[node] += x; 4480 if (flags & SO_TOTAL)
4519 } 4481 x = page->objects;
4520 page = c->partial; 4482 else if (flags & SO_OBJECTS)
4483 x = page->inuse;
4484 else
4485 x = 1;
4521 4486
4487 total += x;
4488 nodes[node] += x;
4489
4490 page = ACCESS_ONCE(c->partial);
4522 if (page) { 4491 if (page) {
4523 x = page->pobjects; 4492 x = page->pobjects;
4524 total += x; 4493 total += x;
4525 nodes[node] += x; 4494 nodes[node] += x;
4526 } 4495 }
4496
4527 per_cpu[node]++; 4497 per_cpu[node]++;
4528 } 4498 }
4529 } 4499 }
@@ -4623,7 +4593,7 @@ SLAB_ATTR_RO(align);
4623 4593
4624static ssize_t object_size_show(struct kmem_cache *s, char *buf) 4594static ssize_t object_size_show(struct kmem_cache *s, char *buf)
4625{ 4595{
4626 return sprintf(buf, "%d\n", s->objsize); 4596 return sprintf(buf, "%d\n", s->object_size);
4627} 4597}
4628SLAB_ATTR_RO(object_size); 4598SLAB_ATTR_RO(object_size);
4629 4599
@@ -5286,7 +5256,7 @@ static int sysfs_slab_add(struct kmem_cache *s)
5286 const char *name; 5256 const char *name;
5287 int unmergeable; 5257 int unmergeable;
5288 5258
5289 if (slab_state < SYSFS) 5259 if (slab_state < FULL)
5290 /* Defer until later */ 5260 /* Defer until later */
5291 return 0; 5261 return 0;
5292 5262
@@ -5331,7 +5301,7 @@ static int sysfs_slab_add(struct kmem_cache *s)
5331 5301
5332static void sysfs_slab_remove(struct kmem_cache *s) 5302static void sysfs_slab_remove(struct kmem_cache *s)
5333{ 5303{
5334 if (slab_state < SYSFS) 5304 if (slab_state < FULL)
5335 /* 5305 /*
5336 * Sysfs has not been setup yet so no need to remove the 5306 * Sysfs has not been setup yet so no need to remove the
5337 * cache from sysfs. 5307 * cache from sysfs.
@@ -5359,7 +5329,7 @@ static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
5359{ 5329{
5360 struct saved_alias *al; 5330 struct saved_alias *al;
5361 5331
5362 if (slab_state == SYSFS) { 5332 if (slab_state == FULL) {
5363 /* 5333 /*
5364 * If we have a leftover link then remove it. 5334 * If we have a leftover link then remove it.
5365 */ 5335 */
@@ -5383,16 +5353,16 @@ static int __init slab_sysfs_init(void)
5383 struct kmem_cache *s; 5353 struct kmem_cache *s;
5384 int err; 5354 int err;
5385 5355
5386 down_write(&slub_lock); 5356 mutex_lock(&slab_mutex);
5387 5357
5388 slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj); 5358 slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);
5389 if (!slab_kset) { 5359 if (!slab_kset) {
5390 up_write(&slub_lock); 5360 mutex_unlock(&slab_mutex);
5391 printk(KERN_ERR "Cannot register slab subsystem.\n"); 5361 printk(KERN_ERR "Cannot register slab subsystem.\n");
5392 return -ENOSYS; 5362 return -ENOSYS;
5393 } 5363 }
5394 5364
5395 slab_state = SYSFS; 5365 slab_state = FULL;
5396 5366
5397 list_for_each_entry(s, &slab_caches, list) { 5367 list_for_each_entry(s, &slab_caches, list) {
5398 err = sysfs_slab_add(s); 5368 err = sysfs_slab_add(s);
@@ -5408,11 +5378,11 @@ static int __init slab_sysfs_init(void)
5408 err = sysfs_slab_alias(al->s, al->name); 5378 err = sysfs_slab_alias(al->s, al->name);
5409 if (err) 5379 if (err)
5410 printk(KERN_ERR "SLUB: Unable to add boot slab alias" 5380 printk(KERN_ERR "SLUB: Unable to add boot slab alias"
5411 " %s to sysfs\n", s->name); 5381 " %s to sysfs\n", al->name);
5412 kfree(al); 5382 kfree(al);
5413 } 5383 }
5414 5384
5415 up_write(&slub_lock); 5385 mutex_unlock(&slab_mutex);
5416 resiliency_test(); 5386 resiliency_test();
5417 return 0; 5387 return 0;
5418} 5388}
@@ -5427,7 +5397,7 @@ __initcall(slab_sysfs_init);
5427static void print_slabinfo_header(struct seq_file *m) 5397static void print_slabinfo_header(struct seq_file *m)
5428{ 5398{
5429 seq_puts(m, "slabinfo - version: 2.1\n"); 5399 seq_puts(m, "slabinfo - version: 2.1\n");
5430 seq_puts(m, "# name <active_objs> <num_objs> <objsize> " 5400 seq_puts(m, "# name <active_objs> <num_objs> <object_size> "
5431 "<objperslab> <pagesperslab>"); 5401 "<objperslab> <pagesperslab>");
5432 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>"); 5402 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
5433 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>"); 5403 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
@@ -5438,7 +5408,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
5438{ 5408{
5439 loff_t n = *pos; 5409 loff_t n = *pos;
5440 5410
5441 down_read(&slub_lock); 5411 mutex_lock(&slab_mutex);
5442 if (!n) 5412 if (!n)
5443 print_slabinfo_header(m); 5413 print_slabinfo_header(m);
5444 5414
@@ -5452,7 +5422,7 @@ static void *s_next(struct seq_file *m, void *p, loff_t *pos)
5452 5422
5453static void s_stop(struct seq_file *m, void *p) 5423static void s_stop(struct seq_file *m, void *p)
5454{ 5424{
5455 up_read(&slub_lock); 5425 mutex_unlock(&slab_mutex);
5456} 5426}
5457 5427
5458static int s_show(struct seq_file *m, void *p) 5428static int s_show(struct seq_file *m, void *p)