aboutsummaryrefslogtreecommitdiffstats
path: root/mm/slub.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/slub.c')
-rw-r--r--mm/slub.c464
1 files changed, 229 insertions, 235 deletions
diff --git a/mm/slub.c b/mm/slub.c
index 8c691fa1cf3c..8f78e2577031 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -16,6 +16,7 @@
16#include <linux/interrupt.h> 16#include <linux/interrupt.h>
17#include <linux/bitops.h> 17#include <linux/bitops.h>
18#include <linux/slab.h> 18#include <linux/slab.h>
19#include "slab.h"
19#include <linux/proc_fs.h> 20#include <linux/proc_fs.h>
20#include <linux/seq_file.h> 21#include <linux/seq_file.h>
21#include <linux/kmemcheck.h> 22#include <linux/kmemcheck.h>
@@ -33,15 +34,17 @@
33 34
34#include <trace/events/kmem.h> 35#include <trace/events/kmem.h>
35 36
37#include "internal.h"
38
36/* 39/*
37 * Lock order: 40 * Lock order:
38 * 1. slub_lock (Global Semaphore) 41 * 1. slab_mutex (Global Mutex)
39 * 2. node->list_lock 42 * 2. node->list_lock
40 * 3. slab_lock(page) (Only on some arches and for debugging) 43 * 3. slab_lock(page) (Only on some arches and for debugging)
41 * 44 *
42 * slub_lock 45 * slab_mutex
43 * 46 *
44 * The role of the slub_lock is to protect the list of all the slabs 47 * The role of the slab_mutex is to protect the list of all the slabs
45 * and to synchronize major metadata changes to slab cache structures. 48 * and to synchronize major metadata changes to slab cache structures.
46 * 49 *
47 * The slab_lock is only used for debugging and on arches that do not 50 * The slab_lock is only used for debugging and on arches that do not
@@ -182,17 +185,6 @@ static int kmem_size = sizeof(struct kmem_cache);
182static struct notifier_block slab_notifier; 185static struct notifier_block slab_notifier;
183#endif 186#endif
184 187
185static enum {
186 DOWN, /* No slab functionality available */
187 PARTIAL, /* Kmem_cache_node works */
188 UP, /* Everything works but does not show up in sysfs */
189 SYSFS /* Sysfs up */
190} slab_state = DOWN;
191
192/* A list of all slab caches on the system */
193static DECLARE_RWSEM(slub_lock);
194static LIST_HEAD(slab_caches);
195
196/* 188/*
197 * Tracking user of a slab. 189 * Tracking user of a slab.
198 */ 190 */
@@ -237,11 +229,6 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si)
237 * Core slab cache functions 229 * Core slab cache functions
238 *******************************************************************/ 230 *******************************************************************/
239 231
240int slab_is_available(void)
241{
242 return slab_state >= UP;
243}
244
245static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) 232static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
246{ 233{
247 return s->node[node]; 234 return s->node[node];
@@ -311,7 +298,7 @@ static inline size_t slab_ksize(const struct kmem_cache *s)
311 * and whatever may come after it. 298 * and whatever may come after it.
312 */ 299 */
313 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) 300 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
314 return s->objsize; 301 return s->object_size;
315 302
316#endif 303#endif
317 /* 304 /*
@@ -609,11 +596,11 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
609 if (p > addr + 16) 596 if (p > addr + 16)
610 print_section("Bytes b4 ", p - 16, 16); 597 print_section("Bytes b4 ", p - 16, 16);
611 598
612 print_section("Object ", p, min_t(unsigned long, s->objsize, 599 print_section("Object ", p, min_t(unsigned long, s->object_size,
613 PAGE_SIZE)); 600 PAGE_SIZE));
614 if (s->flags & SLAB_RED_ZONE) 601 if (s->flags & SLAB_RED_ZONE)
615 print_section("Redzone ", p + s->objsize, 602 print_section("Redzone ", p + s->object_size,
616 s->inuse - s->objsize); 603 s->inuse - s->object_size);
617 604
618 if (s->offset) 605 if (s->offset)
619 off = s->offset + sizeof(void *); 606 off = s->offset + sizeof(void *);
@@ -655,12 +642,12 @@ static void init_object(struct kmem_cache *s, void *object, u8 val)
655 u8 *p = object; 642 u8 *p = object;
656 643
657 if (s->flags & __OBJECT_POISON) { 644 if (s->flags & __OBJECT_POISON) {
658 memset(p, POISON_FREE, s->objsize - 1); 645 memset(p, POISON_FREE, s->object_size - 1);
659 p[s->objsize - 1] = POISON_END; 646 p[s->object_size - 1] = POISON_END;
660 } 647 }
661 648
662 if (s->flags & SLAB_RED_ZONE) 649 if (s->flags & SLAB_RED_ZONE)
663 memset(p + s->objsize, val, s->inuse - s->objsize); 650 memset(p + s->object_size, val, s->inuse - s->object_size);
664} 651}
665 652
666static void restore_bytes(struct kmem_cache *s, char *message, u8 data, 653static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
@@ -705,10 +692,10 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
705 * Poisoning uses 0x6b (POISON_FREE) and the last byte is 692 * Poisoning uses 0x6b (POISON_FREE) and the last byte is
706 * 0xa5 (POISON_END) 693 * 0xa5 (POISON_END)
707 * 694 *
708 * object + s->objsize 695 * object + s->object_size
709 * Padding to reach word boundary. This is also used for Redzoning. 696 * Padding to reach word boundary. This is also used for Redzoning.
710 * Padding is extended by another word if Redzoning is enabled and 697 * Padding is extended by another word if Redzoning is enabled and
711 * objsize == inuse. 698 * object_size == inuse.
712 * 699 *
713 * We fill with 0xbb (RED_INACTIVE) for inactive objects and with 700 * We fill with 0xbb (RED_INACTIVE) for inactive objects and with
714 * 0xcc (RED_ACTIVE) for objects in use. 701 * 0xcc (RED_ACTIVE) for objects in use.
@@ -727,7 +714,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
727 * object + s->size 714 * object + s->size
728 * Nothing is used beyond s->size. 715 * Nothing is used beyond s->size.
729 * 716 *
730 * If slabcaches are merged then the objsize and inuse boundaries are mostly 717 * If slabcaches are merged then the object_size and inuse boundaries are mostly
731 * ignored. And therefore no slab options that rely on these boundaries 718 * ignored. And therefore no slab options that rely on these boundaries
732 * may be used with merged slabcaches. 719 * may be used with merged slabcaches.
733 */ 720 */
@@ -787,25 +774,25 @@ static int check_object(struct kmem_cache *s, struct page *page,
787 void *object, u8 val) 774 void *object, u8 val)
788{ 775{
789 u8 *p = object; 776 u8 *p = object;
790 u8 *endobject = object + s->objsize; 777 u8 *endobject = object + s->object_size;
791 778
792 if (s->flags & SLAB_RED_ZONE) { 779 if (s->flags & SLAB_RED_ZONE) {
793 if (!check_bytes_and_report(s, page, object, "Redzone", 780 if (!check_bytes_and_report(s, page, object, "Redzone",
794 endobject, val, s->inuse - s->objsize)) 781 endobject, val, s->inuse - s->object_size))
795 return 0; 782 return 0;
796 } else { 783 } else {
797 if ((s->flags & SLAB_POISON) && s->objsize < s->inuse) { 784 if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
798 check_bytes_and_report(s, page, p, "Alignment padding", 785 check_bytes_and_report(s, page, p, "Alignment padding",
799 endobject, POISON_INUSE, s->inuse - s->objsize); 786 endobject, POISON_INUSE, s->inuse - s->object_size);
800 } 787 }
801 } 788 }
802 789
803 if (s->flags & SLAB_POISON) { 790 if (s->flags & SLAB_POISON) {
804 if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) && 791 if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
805 (!check_bytes_and_report(s, page, p, "Poison", p, 792 (!check_bytes_and_report(s, page, p, "Poison", p,
806 POISON_FREE, s->objsize - 1) || 793 POISON_FREE, s->object_size - 1) ||
807 !check_bytes_and_report(s, page, p, "Poison", 794 !check_bytes_and_report(s, page, p, "Poison",
808 p + s->objsize - 1, POISON_END, 1))) 795 p + s->object_size - 1, POISON_END, 1)))
809 return 0; 796 return 0;
810 /* 797 /*
811 * check_pad_bytes cleans up on its own. 798 * check_pad_bytes cleans up on its own.
@@ -926,7 +913,7 @@ static void trace(struct kmem_cache *s, struct page *page, void *object,
926 page->freelist); 913 page->freelist);
927 914
928 if (!alloc) 915 if (!alloc)
929 print_section("Object ", (void *)object, s->objsize); 916 print_section("Object ", (void *)object, s->object_size);
930 917
931 dump_stack(); 918 dump_stack();
932 } 919 }
@@ -942,14 +929,14 @@ static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
942 lockdep_trace_alloc(flags); 929 lockdep_trace_alloc(flags);
943 might_sleep_if(flags & __GFP_WAIT); 930 might_sleep_if(flags & __GFP_WAIT);
944 931
945 return should_failslab(s->objsize, flags, s->flags); 932 return should_failslab(s->object_size, flags, s->flags);
946} 933}
947 934
948static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object) 935static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object)
949{ 936{
950 flags &= gfp_allowed_mask; 937 flags &= gfp_allowed_mask;
951 kmemcheck_slab_alloc(s, flags, object, slab_ksize(s)); 938 kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
952 kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, flags); 939 kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags);
953} 940}
954 941
955static inline void slab_free_hook(struct kmem_cache *s, void *x) 942static inline void slab_free_hook(struct kmem_cache *s, void *x)
@@ -966,13 +953,13 @@ static inline void slab_free_hook(struct kmem_cache *s, void *x)
966 unsigned long flags; 953 unsigned long flags;
967 954
968 local_irq_save(flags); 955 local_irq_save(flags);
969 kmemcheck_slab_free(s, x, s->objsize); 956 kmemcheck_slab_free(s, x, s->object_size);
970 debug_check_no_locks_freed(x, s->objsize); 957 debug_check_no_locks_freed(x, s->object_size);
971 local_irq_restore(flags); 958 local_irq_restore(flags);
972 } 959 }
973#endif 960#endif
974 if (!(s->flags & SLAB_DEBUG_OBJECTS)) 961 if (!(s->flags & SLAB_DEBUG_OBJECTS))
975 debug_check_no_obj_freed(x, s->objsize); 962 debug_check_no_obj_freed(x, s->object_size);
976} 963}
977 964
978/* 965/*
@@ -1207,7 +1194,7 @@ out:
1207 1194
1208__setup("slub_debug", setup_slub_debug); 1195__setup("slub_debug", setup_slub_debug);
1209 1196
1210static unsigned long kmem_cache_flags(unsigned long objsize, 1197static unsigned long kmem_cache_flags(unsigned long object_size,
1211 unsigned long flags, const char *name, 1198 unsigned long flags, const char *name,
1212 void (*ctor)(void *)) 1199 void (*ctor)(void *))
1213{ 1200{
@@ -1237,7 +1224,7 @@ static inline int check_object(struct kmem_cache *s, struct page *page,
1237static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n, 1224static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
1238 struct page *page) {} 1225 struct page *page) {}
1239static inline void remove_full(struct kmem_cache *s, struct page *page) {} 1226static inline void remove_full(struct kmem_cache *s, struct page *page) {}
1240static inline unsigned long kmem_cache_flags(unsigned long objsize, 1227static inline unsigned long kmem_cache_flags(unsigned long object_size,
1241 unsigned long flags, const char *name, 1228 unsigned long flags, const char *name,
1242 void (*ctor)(void *)) 1229 void (*ctor)(void *))
1243{ 1230{
@@ -1314,13 +1301,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1314 stat(s, ORDER_FALLBACK); 1301 stat(s, ORDER_FALLBACK);
1315 } 1302 }
1316 1303
1317 if (flags & __GFP_WAIT) 1304 if (kmemcheck_enabled && page
1318 local_irq_disable();
1319
1320 if (!page)
1321 return NULL;
1322
1323 if (kmemcheck_enabled
1324 && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) { 1305 && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) {
1325 int pages = 1 << oo_order(oo); 1306 int pages = 1 << oo_order(oo);
1326 1307
@@ -1336,6 +1317,11 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1336 kmemcheck_mark_unallocated_pages(page, pages); 1317 kmemcheck_mark_unallocated_pages(page, pages);
1337 } 1318 }
1338 1319
1320 if (flags & __GFP_WAIT)
1321 local_irq_disable();
1322 if (!page)
1323 return NULL;
1324
1339 page->objects = oo_objects(oo); 1325 page->objects = oo_objects(oo);
1340 mod_zone_page_state(page_zone(page), 1326 mod_zone_page_state(page_zone(page),
1341 (s->flags & SLAB_RECLAIM_ACCOUNT) ? 1327 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
@@ -1370,6 +1356,8 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1370 inc_slabs_node(s, page_to_nid(page), page->objects); 1356 inc_slabs_node(s, page_to_nid(page), page->objects);
1371 page->slab = s; 1357 page->slab = s;
1372 __SetPageSlab(page); 1358 __SetPageSlab(page);
1359 if (page->pfmemalloc)
1360 SetPageSlabPfmemalloc(page);
1373 1361
1374 start = page_address(page); 1362 start = page_address(page);
1375 1363
@@ -1413,6 +1401,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
1413 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, 1401 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1414 -pages); 1402 -pages);
1415 1403
1404 __ClearPageSlabPfmemalloc(page);
1416 __ClearPageSlab(page); 1405 __ClearPageSlab(page);
1417 reset_page_mapcount(page); 1406 reset_page_mapcount(page);
1418 if (current->reclaim_state) 1407 if (current->reclaim_state)
@@ -1490,12 +1479,12 @@ static inline void remove_partial(struct kmem_cache_node *n,
1490} 1479}
1491 1480
1492/* 1481/*
1493 * Lock slab, remove from the partial list and put the object into the 1482 * Remove slab from the partial list, freeze it and
1494 * per cpu freelist. 1483 * return the pointer to the freelist.
1495 * 1484 *
1496 * Returns a list of objects or NULL if it fails. 1485 * Returns a list of objects or NULL if it fails.
1497 * 1486 *
1498 * Must hold list_lock. 1487 * Must hold list_lock since we modify the partial list.
1499 */ 1488 */
1500static inline void *acquire_slab(struct kmem_cache *s, 1489static inline void *acquire_slab(struct kmem_cache *s,
1501 struct kmem_cache_node *n, struct page *page, 1490 struct kmem_cache_node *n, struct page *page,
@@ -1510,26 +1499,27 @@ static inline void *acquire_slab(struct kmem_cache *s,
1510 * The old freelist is the list of objects for the 1499 * The old freelist is the list of objects for the
1511 * per cpu allocation list. 1500 * per cpu allocation list.
1512 */ 1501 */
1513 do { 1502 freelist = page->freelist;
1514 freelist = page->freelist; 1503 counters = page->counters;
1515 counters = page->counters; 1504 new.counters = counters;
1516 new.counters = counters; 1505 if (mode) {
1517 if (mode) { 1506 new.inuse = page->objects;
1518 new.inuse = page->objects; 1507 new.freelist = NULL;
1519 new.freelist = NULL; 1508 } else {
1520 } else { 1509 new.freelist = freelist;
1521 new.freelist = freelist; 1510 }
1522 }
1523 1511
1524 VM_BUG_ON(new.frozen); 1512 VM_BUG_ON(new.frozen);
1525 new.frozen = 1; 1513 new.frozen = 1;
1526 1514
1527 } while (!__cmpxchg_double_slab(s, page, 1515 if (!__cmpxchg_double_slab(s, page,
1528 freelist, counters, 1516 freelist, counters,
1529 new.freelist, new.counters, 1517 new.freelist, new.counters,
1530 "lock and freeze")); 1518 "acquire_slab"))
1519 return NULL;
1531 1520
1532 remove_partial(n, page); 1521 remove_partial(n, page);
1522 WARN_ON(!freelist);
1533 return freelist; 1523 return freelist;
1534} 1524}
1535 1525
@@ -1563,7 +1553,6 @@ static void *get_partial_node(struct kmem_cache *s,
1563 1553
1564 if (!object) { 1554 if (!object) {
1565 c->page = page; 1555 c->page = page;
1566 c->node = page_to_nid(page);
1567 stat(s, ALLOC_FROM_PARTIAL); 1556 stat(s, ALLOC_FROM_PARTIAL);
1568 object = t; 1557 object = t;
1569 available = page->objects - page->inuse; 1558 available = page->objects - page->inuse;
@@ -1617,7 +1606,7 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
1617 1606
1618 do { 1607 do {
1619 cpuset_mems_cookie = get_mems_allowed(); 1608 cpuset_mems_cookie = get_mems_allowed();
1620 zonelist = node_zonelist(slab_node(current->mempolicy), flags); 1609 zonelist = node_zonelist(slab_node(), flags);
1621 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { 1610 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
1622 struct kmem_cache_node *n; 1611 struct kmem_cache_node *n;
1623 1612
@@ -1731,14 +1720,12 @@ void init_kmem_cache_cpus(struct kmem_cache *s)
1731/* 1720/*
1732 * Remove the cpu slab 1721 * Remove the cpu slab
1733 */ 1722 */
1734static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) 1723static void deactivate_slab(struct kmem_cache *s, struct page *page, void *freelist)
1735{ 1724{
1736 enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE }; 1725 enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
1737 struct page *page = c->page;
1738 struct kmem_cache_node *n = get_node(s, page_to_nid(page)); 1726 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1739 int lock = 0; 1727 int lock = 0;
1740 enum slab_modes l = M_NONE, m = M_NONE; 1728 enum slab_modes l = M_NONE, m = M_NONE;
1741 void *freelist;
1742 void *nextfree; 1729 void *nextfree;
1743 int tail = DEACTIVATE_TO_HEAD; 1730 int tail = DEACTIVATE_TO_HEAD;
1744 struct page new; 1731 struct page new;
@@ -1749,11 +1736,6 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1749 tail = DEACTIVATE_TO_TAIL; 1736 tail = DEACTIVATE_TO_TAIL;
1750 } 1737 }
1751 1738
1752 c->tid = next_tid(c->tid);
1753 c->page = NULL;
1754 freelist = c->freelist;
1755 c->freelist = NULL;
1756
1757 /* 1739 /*
1758 * Stage one: Free all available per cpu objects back 1740 * Stage one: Free all available per cpu objects back
1759 * to the page freelist while it is still frozen. Leave the 1741 * to the page freelist while it is still frozen. Leave the
@@ -1879,21 +1861,31 @@ redo:
1879 } 1861 }
1880} 1862}
1881 1863
1882/* Unfreeze all the cpu partial slabs */ 1864/*
1865 * Unfreeze all the cpu partial slabs.
1866 *
1867 * This function must be called with interrupt disabled.
1868 */
1883static void unfreeze_partials(struct kmem_cache *s) 1869static void unfreeze_partials(struct kmem_cache *s)
1884{ 1870{
1885 struct kmem_cache_node *n = NULL; 1871 struct kmem_cache_node *n = NULL, *n2 = NULL;
1886 struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab); 1872 struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);
1887 struct page *page, *discard_page = NULL; 1873 struct page *page, *discard_page = NULL;
1888 1874
1889 while ((page = c->partial)) { 1875 while ((page = c->partial)) {
1890 enum slab_modes { M_PARTIAL, M_FREE };
1891 enum slab_modes l, m;
1892 struct page new; 1876 struct page new;
1893 struct page old; 1877 struct page old;
1894 1878
1895 c->partial = page->next; 1879 c->partial = page->next;
1896 l = M_FREE; 1880
1881 n2 = get_node(s, page_to_nid(page));
1882 if (n != n2) {
1883 if (n)
1884 spin_unlock(&n->list_lock);
1885
1886 n = n2;
1887 spin_lock(&n->list_lock);
1888 }
1897 1889
1898 do { 1890 do {
1899 1891
@@ -1906,43 +1898,17 @@ static void unfreeze_partials(struct kmem_cache *s)
1906 1898
1907 new.frozen = 0; 1899 new.frozen = 0;
1908 1900
1909 if (!new.inuse && (!n || n->nr_partial > s->min_partial)) 1901 } while (!__cmpxchg_double_slab(s, page,
1910 m = M_FREE;
1911 else {
1912 struct kmem_cache_node *n2 = get_node(s,
1913 page_to_nid(page));
1914
1915 m = M_PARTIAL;
1916 if (n != n2) {
1917 if (n)
1918 spin_unlock(&n->list_lock);
1919
1920 n = n2;
1921 spin_lock(&n->list_lock);
1922 }
1923 }
1924
1925 if (l != m) {
1926 if (l == M_PARTIAL) {
1927 remove_partial(n, page);
1928 stat(s, FREE_REMOVE_PARTIAL);
1929 } else {
1930 add_partial(n, page,
1931 DEACTIVATE_TO_TAIL);
1932 stat(s, FREE_ADD_PARTIAL);
1933 }
1934
1935 l = m;
1936 }
1937
1938 } while (!cmpxchg_double_slab(s, page,
1939 old.freelist, old.counters, 1902 old.freelist, old.counters,
1940 new.freelist, new.counters, 1903 new.freelist, new.counters,
1941 "unfreezing slab")); 1904 "unfreezing slab"));
1942 1905
1943 if (m == M_FREE) { 1906 if (unlikely(!new.inuse && n->nr_partial > s->min_partial)) {
1944 page->next = discard_page; 1907 page->next = discard_page;
1945 discard_page = page; 1908 discard_page = page;
1909 } else {
1910 add_partial(n, page, DEACTIVATE_TO_TAIL);
1911 stat(s, FREE_ADD_PARTIAL);
1946 } 1912 }
1947 } 1913 }
1948 1914
@@ -2011,7 +1977,11 @@ int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
2011static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) 1977static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
2012{ 1978{
2013 stat(s, CPUSLAB_FLUSH); 1979 stat(s, CPUSLAB_FLUSH);
2014 deactivate_slab(s, c); 1980 deactivate_slab(s, c->page, c->freelist);
1981
1982 c->tid = next_tid(c->tid);
1983 c->page = NULL;
1984 c->freelist = NULL;
2015} 1985}
2016 1986
2017/* 1987/*
@@ -2055,10 +2025,10 @@ static void flush_all(struct kmem_cache *s)
2055 * Check if the objects in a per cpu structure fit numa 2025 * Check if the objects in a per cpu structure fit numa
2056 * locality expectations. 2026 * locality expectations.
2057 */ 2027 */
2058static inline int node_match(struct kmem_cache_cpu *c, int node) 2028static inline int node_match(struct page *page, int node)
2059{ 2029{
2060#ifdef CONFIG_NUMA 2030#ifdef CONFIG_NUMA
2061 if (node != NUMA_NO_NODE && c->node != node) 2031 if (node != NUMA_NO_NODE && page_to_nid(page) != node)
2062 return 0; 2032 return 0;
2063#endif 2033#endif
2064 return 1; 2034 return 1;
@@ -2101,10 +2071,10 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
2101 "SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n", 2071 "SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n",
2102 nid, gfpflags); 2072 nid, gfpflags);
2103 printk(KERN_WARNING " cache: %s, object size: %d, buffer size: %d, " 2073 printk(KERN_WARNING " cache: %s, object size: %d, buffer size: %d, "
2104 "default order: %d, min order: %d\n", s->name, s->objsize, 2074 "default order: %d, min order: %d\n", s->name, s->object_size,
2105 s->size, oo_order(s->oo), oo_order(s->min)); 2075 s->size, oo_order(s->oo), oo_order(s->min));
2106 2076
2107 if (oo_order(s->min) > get_order(s->objsize)) 2077 if (oo_order(s->min) > get_order(s->object_size))
2108 printk(KERN_WARNING " %s debugging increased min order, use " 2078 printk(KERN_WARNING " %s debugging increased min order, use "
2109 "slub_debug=O to disable.\n", s->name); 2079 "slub_debug=O to disable.\n", s->name);
2110 2080
@@ -2130,10 +2100,16 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
2130static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags, 2100static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
2131 int node, struct kmem_cache_cpu **pc) 2101 int node, struct kmem_cache_cpu **pc)
2132{ 2102{
2133 void *object; 2103 void *freelist;
2134 struct kmem_cache_cpu *c; 2104 struct kmem_cache_cpu *c = *pc;
2135 struct page *page = new_slab(s, flags, node); 2105 struct page *page;
2136 2106
2107 freelist = get_partial(s, flags, node, c);
2108
2109 if (freelist)
2110 return freelist;
2111
2112 page = new_slab(s, flags, node);
2137 if (page) { 2113 if (page) {
2138 c = __this_cpu_ptr(s->cpu_slab); 2114 c = __this_cpu_ptr(s->cpu_slab);
2139 if (c->page) 2115 if (c->page)
@@ -2143,17 +2119,24 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
2143 * No other reference to the page yet so we can 2119 * No other reference to the page yet so we can
2144 * muck around with it freely without cmpxchg 2120 * muck around with it freely without cmpxchg
2145 */ 2121 */
2146 object = page->freelist; 2122 freelist = page->freelist;
2147 page->freelist = NULL; 2123 page->freelist = NULL;
2148 2124
2149 stat(s, ALLOC_SLAB); 2125 stat(s, ALLOC_SLAB);
2150 c->node = page_to_nid(page);
2151 c->page = page; 2126 c->page = page;
2152 *pc = c; 2127 *pc = c;
2153 } else 2128 } else
2154 object = NULL; 2129 freelist = NULL;
2155 2130
2156 return object; 2131 return freelist;
2132}
2133
2134static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags)
2135{
2136 if (unlikely(PageSlabPfmemalloc(page)))
2137 return gfp_pfmemalloc_allowed(gfpflags);
2138
2139 return true;
2157} 2140}
2158 2141
2159/* 2142/*
@@ -2163,6 +2146,8 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
2163 * The page is still frozen if the return value is not NULL. 2146 * The page is still frozen if the return value is not NULL.
2164 * 2147 *
2165 * If this function returns NULL then the page has been unfrozen. 2148 * If this function returns NULL then the page has been unfrozen.
2149 *
2150 * This function must be called with interrupt disabled.
2166 */ 2151 */
2167static inline void *get_freelist(struct kmem_cache *s, struct page *page) 2152static inline void *get_freelist(struct kmem_cache *s, struct page *page)
2168{ 2153{
@@ -2173,13 +2158,14 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page)
2173 do { 2158 do {
2174 freelist = page->freelist; 2159 freelist = page->freelist;
2175 counters = page->counters; 2160 counters = page->counters;
2161
2176 new.counters = counters; 2162 new.counters = counters;
2177 VM_BUG_ON(!new.frozen); 2163 VM_BUG_ON(!new.frozen);
2178 2164
2179 new.inuse = page->objects; 2165 new.inuse = page->objects;
2180 new.frozen = freelist != NULL; 2166 new.frozen = freelist != NULL;
2181 2167
2182 } while (!cmpxchg_double_slab(s, page, 2168 } while (!__cmpxchg_double_slab(s, page,
2183 freelist, counters, 2169 freelist, counters,
2184 NULL, new.counters, 2170 NULL, new.counters,
2185 "get_freelist")); 2171 "get_freelist"));
@@ -2206,7 +2192,8 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page)
2206static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, 2192static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2207 unsigned long addr, struct kmem_cache_cpu *c) 2193 unsigned long addr, struct kmem_cache_cpu *c)
2208{ 2194{
2209 void **object; 2195 void *freelist;
2196 struct page *page;
2210 unsigned long flags; 2197 unsigned long flags;
2211 2198
2212 local_irq_save(flags); 2199 local_irq_save(flags);
@@ -2219,25 +2206,41 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2219 c = this_cpu_ptr(s->cpu_slab); 2206 c = this_cpu_ptr(s->cpu_slab);
2220#endif 2207#endif
2221 2208
2222 if (!c->page) 2209 page = c->page;
2210 if (!page)
2223 goto new_slab; 2211 goto new_slab;
2224redo: 2212redo:
2225 if (unlikely(!node_match(c, node))) { 2213
2214 if (unlikely(!node_match(page, node))) {
2226 stat(s, ALLOC_NODE_MISMATCH); 2215 stat(s, ALLOC_NODE_MISMATCH);
2227 deactivate_slab(s, c); 2216 deactivate_slab(s, page, c->freelist);
2217 c->page = NULL;
2218 c->freelist = NULL;
2219 goto new_slab;
2220 }
2221
2222 /*
2223 * By rights, we should be searching for a slab page that was
2224 * PFMEMALLOC but right now, we are losing the pfmemalloc
2225 * information when the page leaves the per-cpu allocator
2226 */
2227 if (unlikely(!pfmemalloc_match(page, gfpflags))) {
2228 deactivate_slab(s, page, c->freelist);
2229 c->page = NULL;
2230 c->freelist = NULL;
2228 goto new_slab; 2231 goto new_slab;
2229 } 2232 }
2230 2233
2231 /* must check again c->freelist in case of cpu migration or IRQ */ 2234 /* must check again c->freelist in case of cpu migration or IRQ */
2232 object = c->freelist; 2235 freelist = c->freelist;
2233 if (object) 2236 if (freelist)
2234 goto load_freelist; 2237 goto load_freelist;
2235 2238
2236 stat(s, ALLOC_SLOWPATH); 2239 stat(s, ALLOC_SLOWPATH);
2237 2240
2238 object = get_freelist(s, c->page); 2241 freelist = get_freelist(s, page);
2239 2242
2240 if (!object) { 2243 if (!freelist) {
2241 c->page = NULL; 2244 c->page = NULL;
2242 stat(s, DEACTIVATE_BYPASS); 2245 stat(s, DEACTIVATE_BYPASS);
2243 goto new_slab; 2246 goto new_slab;
@@ -2246,50 +2249,50 @@ redo:
2246 stat(s, ALLOC_REFILL); 2249 stat(s, ALLOC_REFILL);
2247 2250
2248load_freelist: 2251load_freelist:
2249 c->freelist = get_freepointer(s, object); 2252 /*
2253 * freelist is pointing to the list of objects to be used.
2254 * page is pointing to the page from which the objects are obtained.
2255 * That page must be frozen for per cpu allocations to work.
2256 */
2257 VM_BUG_ON(!c->page->frozen);
2258 c->freelist = get_freepointer(s, freelist);
2250 c->tid = next_tid(c->tid); 2259 c->tid = next_tid(c->tid);
2251 local_irq_restore(flags); 2260 local_irq_restore(flags);
2252 return object; 2261 return freelist;
2253 2262
2254new_slab: 2263new_slab:
2255 2264
2256 if (c->partial) { 2265 if (c->partial) {
2257 c->page = c->partial; 2266 page = c->page = c->partial;
2258 c->partial = c->page->next; 2267 c->partial = page->next;
2259 c->node = page_to_nid(c->page);
2260 stat(s, CPU_PARTIAL_ALLOC); 2268 stat(s, CPU_PARTIAL_ALLOC);
2261 c->freelist = NULL; 2269 c->freelist = NULL;
2262 goto redo; 2270 goto redo;
2263 } 2271 }
2264 2272
2265 /* Then do expensive stuff like retrieving pages from the partial lists */ 2273 freelist = new_slab_objects(s, gfpflags, node, &c);
2266 object = get_partial(s, gfpflags, node, c);
2267
2268 if (unlikely(!object)) {
2269 2274
2270 object = new_slab_objects(s, gfpflags, node, &c); 2275 if (unlikely(!freelist)) {
2276 if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
2277 slab_out_of_memory(s, gfpflags, node);
2271 2278
2272 if (unlikely(!object)) { 2279 local_irq_restore(flags);
2273 if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit()) 2280 return NULL;
2274 slab_out_of_memory(s, gfpflags, node);
2275
2276 local_irq_restore(flags);
2277 return NULL;
2278 }
2279 } 2281 }
2280 2282
2281 if (likely(!kmem_cache_debug(s))) 2283 page = c->page;
2284 if (likely(!kmem_cache_debug(s) && pfmemalloc_match(page, gfpflags)))
2282 goto load_freelist; 2285 goto load_freelist;
2283 2286
2284 /* Only entered in the debug case */ 2287 /* Only entered in the debug case */
2285 if (!alloc_debug_processing(s, c->page, object, addr)) 2288 if (kmem_cache_debug(s) && !alloc_debug_processing(s, page, freelist, addr))
2286 goto new_slab; /* Slab failed checks. Next slab needed */ 2289 goto new_slab; /* Slab failed checks. Next slab needed */
2287 2290
2288 c->freelist = get_freepointer(s, object); 2291 deactivate_slab(s, page, get_freepointer(s, freelist));
2289 deactivate_slab(s, c); 2292 c->page = NULL;
2290 c->node = NUMA_NO_NODE; 2293 c->freelist = NULL;
2291 local_irq_restore(flags); 2294 local_irq_restore(flags);
2292 return object; 2295 return freelist;
2293} 2296}
2294 2297
2295/* 2298/*
@@ -2307,6 +2310,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
2307{ 2310{
2308 void **object; 2311 void **object;
2309 struct kmem_cache_cpu *c; 2312 struct kmem_cache_cpu *c;
2313 struct page *page;
2310 unsigned long tid; 2314 unsigned long tid;
2311 2315
2312 if (slab_pre_alloc_hook(s, gfpflags)) 2316 if (slab_pre_alloc_hook(s, gfpflags))
@@ -2332,8 +2336,8 @@ redo:
2332 barrier(); 2336 barrier();
2333 2337
2334 object = c->freelist; 2338 object = c->freelist;
2335 if (unlikely(!object || !node_match(c, node))) 2339 page = c->page;
2336 2340 if (unlikely(!object || !node_match(page, node)))
2337 object = __slab_alloc(s, gfpflags, node, addr, c); 2341 object = __slab_alloc(s, gfpflags, node, addr, c);
2338 2342
2339 else { 2343 else {
@@ -2364,7 +2368,7 @@ redo:
2364 } 2368 }
2365 2369
2366 if (unlikely(gfpflags & __GFP_ZERO) && object) 2370 if (unlikely(gfpflags & __GFP_ZERO) && object)
2367 memset(object, 0, s->objsize); 2371 memset(object, 0, s->object_size);
2368 2372
2369 slab_post_alloc_hook(s, gfpflags, object); 2373 slab_post_alloc_hook(s, gfpflags, object);
2370 2374
@@ -2375,7 +2379,7 @@ void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
2375{ 2379{
2376 void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_); 2380 void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_);
2377 2381
2378 trace_kmem_cache_alloc(_RET_IP_, ret, s->objsize, s->size, gfpflags); 2382 trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size, s->size, gfpflags);
2379 2383
2380 return ret; 2384 return ret;
2381} 2385}
@@ -2405,7 +2409,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
2405 void *ret = slab_alloc(s, gfpflags, node, _RET_IP_); 2409 void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);
2406 2410
2407 trace_kmem_cache_alloc_node(_RET_IP_, ret, 2411 trace_kmem_cache_alloc_node(_RET_IP_, ret,
2408 s->objsize, s->size, gfpflags, node); 2412 s->object_size, s->size, gfpflags, node);
2409 2413
2410 return ret; 2414 return ret;
2411} 2415}
@@ -2900,7 +2904,7 @@ static void set_min_partial(struct kmem_cache *s, unsigned long min)
2900static int calculate_sizes(struct kmem_cache *s, int forced_order) 2904static int calculate_sizes(struct kmem_cache *s, int forced_order)
2901{ 2905{
2902 unsigned long flags = s->flags; 2906 unsigned long flags = s->flags;
2903 unsigned long size = s->objsize; 2907 unsigned long size = s->object_size;
2904 unsigned long align = s->align; 2908 unsigned long align = s->align;
2905 int order; 2909 int order;
2906 2910
@@ -2929,7 +2933,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
2929 * end of the object and the free pointer. If not then add an 2933 * end of the object and the free pointer. If not then add an
2930 * additional word to have some bytes to store Redzone information. 2934 * additional word to have some bytes to store Redzone information.
2931 */ 2935 */
2932 if ((flags & SLAB_RED_ZONE) && size == s->objsize) 2936 if ((flags & SLAB_RED_ZONE) && size == s->object_size)
2933 size += sizeof(void *); 2937 size += sizeof(void *);
2934#endif 2938#endif
2935 2939
@@ -2977,7 +2981,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
2977 * user specified and the dynamic determination of cache line size 2981 * user specified and the dynamic determination of cache line size
2978 * on bootup. 2982 * on bootup.
2979 */ 2983 */
2980 align = calculate_alignment(flags, align, s->objsize); 2984 align = calculate_alignment(flags, align, s->object_size);
2981 s->align = align; 2985 s->align = align;
2982 2986
2983 /* 2987 /*
@@ -3025,7 +3029,7 @@ static int kmem_cache_open(struct kmem_cache *s,
3025 memset(s, 0, kmem_size); 3029 memset(s, 0, kmem_size);
3026 s->name = name; 3030 s->name = name;
3027 s->ctor = ctor; 3031 s->ctor = ctor;
3028 s->objsize = size; 3032 s->object_size = size;
3029 s->align = align; 3033 s->align = align;
3030 s->flags = kmem_cache_flags(size, flags, name, ctor); 3034 s->flags = kmem_cache_flags(size, flags, name, ctor);
3031 s->reserved = 0; 3035 s->reserved = 0;
@@ -3040,7 +3044,7 @@ static int kmem_cache_open(struct kmem_cache *s,
3040 * Disable debugging flags that store metadata if the min slab 3044 * Disable debugging flags that store metadata if the min slab
3041 * order increased. 3045 * order increased.
3042 */ 3046 */
3043 if (get_order(s->size) > get_order(s->objsize)) { 3047 if (get_order(s->size) > get_order(s->object_size)) {
3044 s->flags &= ~DEBUG_METADATA_FLAGS; 3048 s->flags &= ~DEBUG_METADATA_FLAGS;
3045 s->offset = 0; 3049 s->offset = 0;
3046 if (!calculate_sizes(s, -1)) 3050 if (!calculate_sizes(s, -1))
@@ -3114,7 +3118,7 @@ error:
3114 */ 3118 */
3115unsigned int kmem_cache_size(struct kmem_cache *s) 3119unsigned int kmem_cache_size(struct kmem_cache *s)
3116{ 3120{
3117 return s->objsize; 3121 return s->object_size;
3118} 3122}
3119EXPORT_SYMBOL(kmem_cache_size); 3123EXPORT_SYMBOL(kmem_cache_size);
3120 3124
@@ -3192,11 +3196,11 @@ static inline int kmem_cache_close(struct kmem_cache *s)
3192 */ 3196 */
3193void kmem_cache_destroy(struct kmem_cache *s) 3197void kmem_cache_destroy(struct kmem_cache *s)
3194{ 3198{
3195 down_write(&slub_lock); 3199 mutex_lock(&slab_mutex);
3196 s->refcount--; 3200 s->refcount--;
3197 if (!s->refcount) { 3201 if (!s->refcount) {
3198 list_del(&s->list); 3202 list_del(&s->list);
3199 up_write(&slub_lock); 3203 mutex_unlock(&slab_mutex);
3200 if (kmem_cache_close(s)) { 3204 if (kmem_cache_close(s)) {
3201 printk(KERN_ERR "SLUB %s: %s called for cache that " 3205 printk(KERN_ERR "SLUB %s: %s called for cache that "
3202 "still has objects.\n", s->name, __func__); 3206 "still has objects.\n", s->name, __func__);
@@ -3206,7 +3210,7 @@ void kmem_cache_destroy(struct kmem_cache *s)
3206 rcu_barrier(); 3210 rcu_barrier();
3207 sysfs_slab_remove(s); 3211 sysfs_slab_remove(s);
3208 } else 3212 } else
3209 up_write(&slub_lock); 3213 mutex_unlock(&slab_mutex);
3210} 3214}
3211EXPORT_SYMBOL(kmem_cache_destroy); 3215EXPORT_SYMBOL(kmem_cache_destroy);
3212 3216
@@ -3268,7 +3272,7 @@ static struct kmem_cache *__init create_kmalloc_cache(const char *name,
3268 3272
3269 /* 3273 /*
3270 * This function is called with IRQs disabled during early-boot on 3274 * This function is called with IRQs disabled during early-boot on
3271 * single CPU so there's no need to take slub_lock here. 3275 * single CPU so there's no need to take slab_mutex here.
3272 */ 3276 */
3273 if (!kmem_cache_open(s, name, size, ARCH_KMALLOC_MINALIGN, 3277 if (!kmem_cache_open(s, name, size, ARCH_KMALLOC_MINALIGN,
3274 flags, NULL)) 3278 flags, NULL))
@@ -3553,10 +3557,10 @@ static int slab_mem_going_offline_callback(void *arg)
3553{ 3557{
3554 struct kmem_cache *s; 3558 struct kmem_cache *s;
3555 3559
3556 down_read(&slub_lock); 3560 mutex_lock(&slab_mutex);
3557 list_for_each_entry(s, &slab_caches, list) 3561 list_for_each_entry(s, &slab_caches, list)
3558 kmem_cache_shrink(s); 3562 kmem_cache_shrink(s);
3559 up_read(&slub_lock); 3563 mutex_unlock(&slab_mutex);
3560 3564
3561 return 0; 3565 return 0;
3562} 3566}
@@ -3577,7 +3581,7 @@ static void slab_mem_offline_callback(void *arg)
3577 if (offline_node < 0) 3581 if (offline_node < 0)
3578 return; 3582 return;
3579 3583
3580 down_read(&slub_lock); 3584 mutex_lock(&slab_mutex);
3581 list_for_each_entry(s, &slab_caches, list) { 3585 list_for_each_entry(s, &slab_caches, list) {
3582 n = get_node(s, offline_node); 3586 n = get_node(s, offline_node);
3583 if (n) { 3587 if (n) {
@@ -3593,7 +3597,7 @@ static void slab_mem_offline_callback(void *arg)
3593 kmem_cache_free(kmem_cache_node, n); 3597 kmem_cache_free(kmem_cache_node, n);
3594 } 3598 }
3595 } 3599 }
3596 up_read(&slub_lock); 3600 mutex_unlock(&slab_mutex);
3597} 3601}
3598 3602
3599static int slab_mem_going_online_callback(void *arg) 3603static int slab_mem_going_online_callback(void *arg)
@@ -3616,7 +3620,7 @@ static int slab_mem_going_online_callback(void *arg)
3616 * allocate a kmem_cache_node structure in order to bring the node 3620 * allocate a kmem_cache_node structure in order to bring the node
3617 * online. 3621 * online.
3618 */ 3622 */
3619 down_read(&slub_lock); 3623 mutex_lock(&slab_mutex);
3620 list_for_each_entry(s, &slab_caches, list) { 3624 list_for_each_entry(s, &slab_caches, list) {
3621 /* 3625 /*
3622 * XXX: kmem_cache_alloc_node will fallback to other nodes 3626 * XXX: kmem_cache_alloc_node will fallback to other nodes
@@ -3632,7 +3636,7 @@ static int slab_mem_going_online_callback(void *arg)
3632 s->node[nid] = n; 3636 s->node[nid] = n;
3633 } 3637 }
3634out: 3638out:
3635 up_read(&slub_lock); 3639 mutex_unlock(&slab_mutex);
3636 return ret; 3640 return ret;
3637} 3641}
3638 3642
@@ -3843,11 +3847,11 @@ void __init kmem_cache_init(void)
3843 3847
3844 if (s && s->size) { 3848 if (s && s->size) {
3845 char *name = kasprintf(GFP_NOWAIT, 3849 char *name = kasprintf(GFP_NOWAIT,
3846 "dma-kmalloc-%d", s->objsize); 3850 "dma-kmalloc-%d", s->object_size);
3847 3851
3848 BUG_ON(!name); 3852 BUG_ON(!name);
3849 kmalloc_dma_caches[i] = create_kmalloc_cache(name, 3853 kmalloc_dma_caches[i] = create_kmalloc_cache(name,
3850 s->objsize, SLAB_CACHE_DMA); 3854 s->object_size, SLAB_CACHE_DMA);
3851 } 3855 }
3852 } 3856 }
3853#endif 3857#endif
@@ -3924,16 +3928,12 @@ static struct kmem_cache *find_mergeable(size_t size,
3924 return NULL; 3928 return NULL;
3925} 3929}
3926 3930
3927struct kmem_cache *kmem_cache_create(const char *name, size_t size, 3931struct kmem_cache *__kmem_cache_create(const char *name, size_t size,
3928 size_t align, unsigned long flags, void (*ctor)(void *)) 3932 size_t align, unsigned long flags, void (*ctor)(void *))
3929{ 3933{
3930 struct kmem_cache *s; 3934 struct kmem_cache *s;
3931 char *n; 3935 char *n;
3932 3936
3933 if (WARN_ON(!name))
3934 return NULL;
3935
3936 down_write(&slub_lock);
3937 s = find_mergeable(size, align, flags, name, ctor); 3937 s = find_mergeable(size, align, flags, name, ctor);
3938 if (s) { 3938 if (s) {
3939 s->refcount++; 3939 s->refcount++;
@@ -3941,49 +3941,42 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
3941 * Adjust the object sizes so that we clear 3941 * Adjust the object sizes so that we clear
3942 * the complete object on kzalloc. 3942 * the complete object on kzalloc.
3943 */ 3943 */
3944 s->objsize = max(s->objsize, (int)size); 3944 s->object_size = max(s->object_size, (int)size);
3945 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); 3945 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
3946 3946
3947 if (sysfs_slab_alias(s, name)) { 3947 if (sysfs_slab_alias(s, name)) {
3948 s->refcount--; 3948 s->refcount--;
3949 goto err; 3949 return NULL;
3950 } 3950 }
3951 up_write(&slub_lock);
3952 return s; 3951 return s;
3953 } 3952 }
3954 3953
3955 n = kstrdup(name, GFP_KERNEL); 3954 n = kstrdup(name, GFP_KERNEL);
3956 if (!n) 3955 if (!n)
3957 goto err; 3956 return NULL;
3958 3957
3959 s = kmalloc(kmem_size, GFP_KERNEL); 3958 s = kmalloc(kmem_size, GFP_KERNEL);
3960 if (s) { 3959 if (s) {
3961 if (kmem_cache_open(s, n, 3960 if (kmem_cache_open(s, n,
3962 size, align, flags, ctor)) { 3961 size, align, flags, ctor)) {
3962 int r;
3963
3963 list_add(&s->list, &slab_caches); 3964 list_add(&s->list, &slab_caches);
3964 up_write(&slub_lock); 3965 mutex_unlock(&slab_mutex);
3965 if (sysfs_slab_add(s)) { 3966 r = sysfs_slab_add(s);
3966 down_write(&slub_lock); 3967 mutex_lock(&slab_mutex);
3967 list_del(&s->list); 3968
3968 kfree(n); 3969 if (!r)
3969 kfree(s); 3970 return s;
3970 goto err; 3971
3971 } 3972 list_del(&s->list);
3972 return s; 3973 kmem_cache_close(s);
3973 } 3974 }
3974 kfree(s); 3975 kfree(s);
3975 } 3976 }
3976 kfree(n); 3977 kfree(n);
3977err: 3978 return NULL;
3978 up_write(&slub_lock);
3979
3980 if (flags & SLAB_PANIC)
3981 panic("Cannot create slabcache %s\n", name);
3982 else
3983 s = NULL;
3984 return s;
3985} 3979}
3986EXPORT_SYMBOL(kmem_cache_create);
3987 3980
3988#ifdef CONFIG_SMP 3981#ifdef CONFIG_SMP
3989/* 3982/*
@@ -4002,13 +3995,13 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
4002 case CPU_UP_CANCELED_FROZEN: 3995 case CPU_UP_CANCELED_FROZEN:
4003 case CPU_DEAD: 3996 case CPU_DEAD:
4004 case CPU_DEAD_FROZEN: 3997 case CPU_DEAD_FROZEN:
4005 down_read(&slub_lock); 3998 mutex_lock(&slab_mutex);
4006 list_for_each_entry(s, &slab_caches, list) { 3999 list_for_each_entry(s, &slab_caches, list) {
4007 local_irq_save(flags); 4000 local_irq_save(flags);
4008 __flush_cpu_slab(s, cpu); 4001 __flush_cpu_slab(s, cpu);
4009 local_irq_restore(flags); 4002 local_irq_restore(flags);
4010 } 4003 }
4011 up_read(&slub_lock); 4004 mutex_unlock(&slab_mutex);
4012 break; 4005 break;
4013 default: 4006 default:
4014 break; 4007 break;
@@ -4500,30 +4493,31 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
4500 4493
4501 for_each_possible_cpu(cpu) { 4494 for_each_possible_cpu(cpu) {
4502 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); 4495 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
4503 int node = ACCESS_ONCE(c->node); 4496 int node;
4504 struct page *page; 4497 struct page *page;
4505 4498
4506 if (node < 0)
4507 continue;
4508 page = ACCESS_ONCE(c->page); 4499 page = ACCESS_ONCE(c->page);
4509 if (page) { 4500 if (!page)
4510 if (flags & SO_TOTAL) 4501 continue;
4511 x = page->objects;
4512 else if (flags & SO_OBJECTS)
4513 x = page->inuse;
4514 else
4515 x = 1;
4516 4502
4517 total += x; 4503 node = page_to_nid(page);
4518 nodes[node] += x; 4504 if (flags & SO_TOTAL)
4519 } 4505 x = page->objects;
4520 page = c->partial; 4506 else if (flags & SO_OBJECTS)
4507 x = page->inuse;
4508 else
4509 x = 1;
4521 4510
4511 total += x;
4512 nodes[node] += x;
4513
4514 page = ACCESS_ONCE(c->partial);
4522 if (page) { 4515 if (page) {
4523 x = page->pobjects; 4516 x = page->pobjects;
4524 total += x; 4517 total += x;
4525 nodes[node] += x; 4518 nodes[node] += x;
4526 } 4519 }
4520
4527 per_cpu[node]++; 4521 per_cpu[node]++;
4528 } 4522 }
4529 } 4523 }
@@ -4623,7 +4617,7 @@ SLAB_ATTR_RO(align);
4623 4617
4624static ssize_t object_size_show(struct kmem_cache *s, char *buf) 4618static ssize_t object_size_show(struct kmem_cache *s, char *buf)
4625{ 4619{
4626 return sprintf(buf, "%d\n", s->objsize); 4620 return sprintf(buf, "%d\n", s->object_size);
4627} 4621}
4628SLAB_ATTR_RO(object_size); 4622SLAB_ATTR_RO(object_size);
4629 4623
@@ -5286,7 +5280,7 @@ static int sysfs_slab_add(struct kmem_cache *s)
5286 const char *name; 5280 const char *name;
5287 int unmergeable; 5281 int unmergeable;
5288 5282
5289 if (slab_state < SYSFS) 5283 if (slab_state < FULL)
5290 /* Defer until later */ 5284 /* Defer until later */
5291 return 0; 5285 return 0;
5292 5286
@@ -5331,7 +5325,7 @@ static int sysfs_slab_add(struct kmem_cache *s)
5331 5325
5332static void sysfs_slab_remove(struct kmem_cache *s) 5326static void sysfs_slab_remove(struct kmem_cache *s)
5333{ 5327{
5334 if (slab_state < SYSFS) 5328 if (slab_state < FULL)
5335 /* 5329 /*
5336 * Sysfs has not been setup yet so no need to remove the 5330 * Sysfs has not been setup yet so no need to remove the
5337 * cache from sysfs. 5331 * cache from sysfs.
@@ -5359,7 +5353,7 @@ static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
5359{ 5353{
5360 struct saved_alias *al; 5354 struct saved_alias *al;
5361 5355
5362 if (slab_state == SYSFS) { 5356 if (slab_state == FULL) {
5363 /* 5357 /*
5364 * If we have a leftover link then remove it. 5358 * If we have a leftover link then remove it.
5365 */ 5359 */
@@ -5383,16 +5377,16 @@ static int __init slab_sysfs_init(void)
5383 struct kmem_cache *s; 5377 struct kmem_cache *s;
5384 int err; 5378 int err;
5385 5379
5386 down_write(&slub_lock); 5380 mutex_lock(&slab_mutex);
5387 5381
5388 slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj); 5382 slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);
5389 if (!slab_kset) { 5383 if (!slab_kset) {
5390 up_write(&slub_lock); 5384 mutex_unlock(&slab_mutex);
5391 printk(KERN_ERR "Cannot register slab subsystem.\n"); 5385 printk(KERN_ERR "Cannot register slab subsystem.\n");
5392 return -ENOSYS; 5386 return -ENOSYS;
5393 } 5387 }
5394 5388
5395 slab_state = SYSFS; 5389 slab_state = FULL;
5396 5390
5397 list_for_each_entry(s, &slab_caches, list) { 5391 list_for_each_entry(s, &slab_caches, list) {
5398 err = sysfs_slab_add(s); 5392 err = sysfs_slab_add(s);
@@ -5408,11 +5402,11 @@ static int __init slab_sysfs_init(void)
5408 err = sysfs_slab_alias(al->s, al->name); 5402 err = sysfs_slab_alias(al->s, al->name);
5409 if (err) 5403 if (err)
5410 printk(KERN_ERR "SLUB: Unable to add boot slab alias" 5404 printk(KERN_ERR "SLUB: Unable to add boot slab alias"
5411 " %s to sysfs\n", s->name); 5405 " %s to sysfs\n", al->name);
5412 kfree(al); 5406 kfree(al);
5413 } 5407 }
5414 5408
5415 up_write(&slub_lock); 5409 mutex_unlock(&slab_mutex);
5416 resiliency_test(); 5410 resiliency_test();
5417 return 0; 5411 return 0;
5418} 5412}
@@ -5427,7 +5421,7 @@ __initcall(slab_sysfs_init);
5427static void print_slabinfo_header(struct seq_file *m) 5421static void print_slabinfo_header(struct seq_file *m)
5428{ 5422{
5429 seq_puts(m, "slabinfo - version: 2.1\n"); 5423 seq_puts(m, "slabinfo - version: 2.1\n");
5430 seq_puts(m, "# name <active_objs> <num_objs> <objsize> " 5424 seq_puts(m, "# name <active_objs> <num_objs> <object_size> "
5431 "<objperslab> <pagesperslab>"); 5425 "<objperslab> <pagesperslab>");
5432 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>"); 5426 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
5433 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>"); 5427 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
@@ -5438,7 +5432,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
5438{ 5432{
5439 loff_t n = *pos; 5433 loff_t n = *pos;
5440 5434
5441 down_read(&slub_lock); 5435 mutex_lock(&slab_mutex);
5442 if (!n) 5436 if (!n)
5443 print_slabinfo_header(m); 5437 print_slabinfo_header(m);
5444 5438
@@ -5452,7 +5446,7 @@ static void *s_next(struct seq_file *m, void *p, loff_t *pos)
5452 5446
5453static void s_stop(struct seq_file *m, void *p) 5447static void s_stop(struct seq_file *m, void *p)
5454{ 5448{
5455 up_read(&slub_lock); 5449 mutex_unlock(&slab_mutex);
5456} 5450}
5457 5451
5458static int s_show(struct seq_file *m, void *p) 5452static int s_show(struct seq_file *m, void *p)