diff options
Diffstat (limited to 'mm/slub.c')
-rw-r--r-- | mm/slub.c | 464 |
1 files changed, 229 insertions, 235 deletions
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/interrupt.h> | 16 | #include <linux/interrupt.h> |
17 | #include <linux/bitops.h> | 17 | #include <linux/bitops.h> |
18 | #include <linux/slab.h> | 18 | #include <linux/slab.h> |
19 | #include "slab.h" | ||
19 | #include <linux/proc_fs.h> | 20 | #include <linux/proc_fs.h> |
20 | #include <linux/seq_file.h> | 21 | #include <linux/seq_file.h> |
21 | #include <linux/kmemcheck.h> | 22 | #include <linux/kmemcheck.h> |
@@ -33,15 +34,17 @@ | |||
33 | 34 | ||
34 | #include <trace/events/kmem.h> | 35 | #include <trace/events/kmem.h> |
35 | 36 | ||
37 | #include "internal.h" | ||
38 | |||
36 | /* | 39 | /* |
37 | * Lock order: | 40 | * Lock order: |
38 | * 1. slub_lock (Global Semaphore) | 41 | * 1. slab_mutex (Global Mutex) |
39 | * 2. node->list_lock | 42 | * 2. node->list_lock |
40 | * 3. slab_lock(page) (Only on some arches and for debugging) | 43 | * 3. slab_lock(page) (Only on some arches and for debugging) |
41 | * | 44 | * |
42 | * slub_lock | 45 | * slab_mutex |
43 | * | 46 | * |
44 | * The role of the slub_lock is to protect the list of all the slabs | 47 | * The role of the slab_mutex is to protect the list of all the slabs |
45 | * and to synchronize major metadata changes to slab cache structures. | 48 | * and to synchronize major metadata changes to slab cache structures. |
46 | * | 49 | * |
47 | * The slab_lock is only used for debugging and on arches that do not | 50 | * The slab_lock is only used for debugging and on arches that do not |
@@ -182,17 +185,6 @@ static int kmem_size = sizeof(struct kmem_cache); | |||
182 | static struct notifier_block slab_notifier; | 185 | static struct notifier_block slab_notifier; |
183 | #endif | 186 | #endif |
184 | 187 | ||
185 | static enum { | ||
186 | DOWN, /* No slab functionality available */ | ||
187 | PARTIAL, /* Kmem_cache_node works */ | ||
188 | UP, /* Everything works but does not show up in sysfs */ | ||
189 | SYSFS /* Sysfs up */ | ||
190 | } slab_state = DOWN; | ||
191 | |||
192 | /* A list of all slab caches on the system */ | ||
193 | static DECLARE_RWSEM(slub_lock); | ||
194 | static LIST_HEAD(slab_caches); | ||
195 | |||
196 | /* | 188 | /* |
197 | * Tracking user of a slab. | 189 | * Tracking user of a slab. |
198 | */ | 190 | */ |
@@ -237,11 +229,6 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si) | |||
237 | * Core slab cache functions | 229 | * Core slab cache functions |
238 | *******************************************************************/ | 230 | *******************************************************************/ |
239 | 231 | ||
240 | int slab_is_available(void) | ||
241 | { | ||
242 | return slab_state >= UP; | ||
243 | } | ||
244 | |||
245 | static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) | 232 | static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) |
246 | { | 233 | { |
247 | return s->node[node]; | 234 | return s->node[node]; |
@@ -311,7 +298,7 @@ static inline size_t slab_ksize(const struct kmem_cache *s) | |||
311 | * and whatever may come after it. | 298 | * and whatever may come after it. |
312 | */ | 299 | */ |
313 | if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) | 300 | if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) |
314 | return s->objsize; | 301 | return s->object_size; |
315 | 302 | ||
316 | #endif | 303 | #endif |
317 | /* | 304 | /* |
@@ -609,11 +596,11 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) | |||
609 | if (p > addr + 16) | 596 | if (p > addr + 16) |
610 | print_section("Bytes b4 ", p - 16, 16); | 597 | print_section("Bytes b4 ", p - 16, 16); |
611 | 598 | ||
612 | print_section("Object ", p, min_t(unsigned long, s->objsize, | 599 | print_section("Object ", p, min_t(unsigned long, s->object_size, |
613 | PAGE_SIZE)); | 600 | PAGE_SIZE)); |
614 | if (s->flags & SLAB_RED_ZONE) | 601 | if (s->flags & SLAB_RED_ZONE) |
615 | print_section("Redzone ", p + s->objsize, | 602 | print_section("Redzone ", p + s->object_size, |
616 | s->inuse - s->objsize); | 603 | s->inuse - s->object_size); |
617 | 604 | ||
618 | if (s->offset) | 605 | if (s->offset) |
619 | off = s->offset + sizeof(void *); | 606 | off = s->offset + sizeof(void *); |
@@ -655,12 +642,12 @@ static void init_object(struct kmem_cache *s, void *object, u8 val) | |||
655 | u8 *p = object; | 642 | u8 *p = object; |
656 | 643 | ||
657 | if (s->flags & __OBJECT_POISON) { | 644 | if (s->flags & __OBJECT_POISON) { |
658 | memset(p, POISON_FREE, s->objsize - 1); | 645 | memset(p, POISON_FREE, s->object_size - 1); |
659 | p[s->objsize - 1] = POISON_END; | 646 | p[s->object_size - 1] = POISON_END; |
660 | } | 647 | } |
661 | 648 | ||
662 | if (s->flags & SLAB_RED_ZONE) | 649 | if (s->flags & SLAB_RED_ZONE) |
663 | memset(p + s->objsize, val, s->inuse - s->objsize); | 650 | memset(p + s->object_size, val, s->inuse - s->object_size); |
664 | } | 651 | } |
665 | 652 | ||
666 | static void restore_bytes(struct kmem_cache *s, char *message, u8 data, | 653 | static void restore_bytes(struct kmem_cache *s, char *message, u8 data, |
@@ -705,10 +692,10 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page, | |||
705 | * Poisoning uses 0x6b (POISON_FREE) and the last byte is | 692 | * Poisoning uses 0x6b (POISON_FREE) and the last byte is |
706 | * 0xa5 (POISON_END) | 693 | * 0xa5 (POISON_END) |
707 | * | 694 | * |
708 | * object + s->objsize | 695 | * object + s->object_size |
709 | * Padding to reach word boundary. This is also used for Redzoning. | 696 | * Padding to reach word boundary. This is also used for Redzoning. |
710 | * Padding is extended by another word if Redzoning is enabled and | 697 | * Padding is extended by another word if Redzoning is enabled and |
711 | * objsize == inuse. | 698 | * object_size == inuse. |
712 | * | 699 | * |
713 | * We fill with 0xbb (RED_INACTIVE) for inactive objects and with | 700 | * We fill with 0xbb (RED_INACTIVE) for inactive objects and with |
714 | * 0xcc (RED_ACTIVE) for objects in use. | 701 | * 0xcc (RED_ACTIVE) for objects in use. |
@@ -727,7 +714,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page, | |||
727 | * object + s->size | 714 | * object + s->size |
728 | * Nothing is used beyond s->size. | 715 | * Nothing is used beyond s->size. |
729 | * | 716 | * |
730 | * If slabcaches are merged then the objsize and inuse boundaries are mostly | 717 | * If slabcaches are merged then the object_size and inuse boundaries are mostly |
731 | * ignored. And therefore no slab options that rely on these boundaries | 718 | * ignored. And therefore no slab options that rely on these boundaries |
732 | * may be used with merged slabcaches. | 719 | * may be used with merged slabcaches. |
733 | */ | 720 | */ |
@@ -787,25 +774,25 @@ static int check_object(struct kmem_cache *s, struct page *page, | |||
787 | void *object, u8 val) | 774 | void *object, u8 val) |
788 | { | 775 | { |
789 | u8 *p = object; | 776 | u8 *p = object; |
790 | u8 *endobject = object + s->objsize; | 777 | u8 *endobject = object + s->object_size; |
791 | 778 | ||
792 | if (s->flags & SLAB_RED_ZONE) { | 779 | if (s->flags & SLAB_RED_ZONE) { |
793 | if (!check_bytes_and_report(s, page, object, "Redzone", | 780 | if (!check_bytes_and_report(s, page, object, "Redzone", |
794 | endobject, val, s->inuse - s->objsize)) | 781 | endobject, val, s->inuse - s->object_size)) |
795 | return 0; | 782 | return 0; |
796 | } else { | 783 | } else { |
797 | if ((s->flags & SLAB_POISON) && s->objsize < s->inuse) { | 784 | if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) { |
798 | check_bytes_and_report(s, page, p, "Alignment padding", | 785 | check_bytes_and_report(s, page, p, "Alignment padding", |
799 | endobject, POISON_INUSE, s->inuse - s->objsize); | 786 | endobject, POISON_INUSE, s->inuse - s->object_size); |
800 | } | 787 | } |
801 | } | 788 | } |
802 | 789 | ||
803 | if (s->flags & SLAB_POISON) { | 790 | if (s->flags & SLAB_POISON) { |
804 | if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) && | 791 | if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) && |
805 | (!check_bytes_and_report(s, page, p, "Poison", p, | 792 | (!check_bytes_and_report(s, page, p, "Poison", p, |
806 | POISON_FREE, s->objsize - 1) || | 793 | POISON_FREE, s->object_size - 1) || |
807 | !check_bytes_and_report(s, page, p, "Poison", | 794 | !check_bytes_and_report(s, page, p, "Poison", |
808 | p + s->objsize - 1, POISON_END, 1))) | 795 | p + s->object_size - 1, POISON_END, 1))) |
809 | return 0; | 796 | return 0; |
810 | /* | 797 | /* |
811 | * check_pad_bytes cleans up on its own. | 798 | * check_pad_bytes cleans up on its own. |
@@ -926,7 +913,7 @@ static void trace(struct kmem_cache *s, struct page *page, void *object, | |||
926 | page->freelist); | 913 | page->freelist); |
927 | 914 | ||
928 | if (!alloc) | 915 | if (!alloc) |
929 | print_section("Object ", (void *)object, s->objsize); | 916 | print_section("Object ", (void *)object, s->object_size); |
930 | 917 | ||
931 | dump_stack(); | 918 | dump_stack(); |
932 | } | 919 | } |
@@ -942,14 +929,14 @@ static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags) | |||
942 | lockdep_trace_alloc(flags); | 929 | lockdep_trace_alloc(flags); |
943 | might_sleep_if(flags & __GFP_WAIT); | 930 | might_sleep_if(flags & __GFP_WAIT); |
944 | 931 | ||
945 | return should_failslab(s->objsize, flags, s->flags); | 932 | return should_failslab(s->object_size, flags, s->flags); |
946 | } | 933 | } |
947 | 934 | ||
948 | static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object) | 935 | static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object) |
949 | { | 936 | { |
950 | flags &= gfp_allowed_mask; | 937 | flags &= gfp_allowed_mask; |
951 | kmemcheck_slab_alloc(s, flags, object, slab_ksize(s)); | 938 | kmemcheck_slab_alloc(s, flags, object, slab_ksize(s)); |
952 | kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, flags); | 939 | kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags); |
953 | } | 940 | } |
954 | 941 | ||
955 | static inline void slab_free_hook(struct kmem_cache *s, void *x) | 942 | static inline void slab_free_hook(struct kmem_cache *s, void *x) |
@@ -966,13 +953,13 @@ static inline void slab_free_hook(struct kmem_cache *s, void *x) | |||
966 | unsigned long flags; | 953 | unsigned long flags; |
967 | 954 | ||
968 | local_irq_save(flags); | 955 | local_irq_save(flags); |
969 | kmemcheck_slab_free(s, x, s->objsize); | 956 | kmemcheck_slab_free(s, x, s->object_size); |
970 | debug_check_no_locks_freed(x, s->objsize); | 957 | debug_check_no_locks_freed(x, s->object_size); |
971 | local_irq_restore(flags); | 958 | local_irq_restore(flags); |
972 | } | 959 | } |
973 | #endif | 960 | #endif |
974 | if (!(s->flags & SLAB_DEBUG_OBJECTS)) | 961 | if (!(s->flags & SLAB_DEBUG_OBJECTS)) |
975 | debug_check_no_obj_freed(x, s->objsize); | 962 | debug_check_no_obj_freed(x, s->object_size); |
976 | } | 963 | } |
977 | 964 | ||
978 | /* | 965 | /* |
@@ -1207,7 +1194,7 @@ out: | |||
1207 | 1194 | ||
1208 | __setup("slub_debug", setup_slub_debug); | 1195 | __setup("slub_debug", setup_slub_debug); |
1209 | 1196 | ||
1210 | static unsigned long kmem_cache_flags(unsigned long objsize, | 1197 | static unsigned long kmem_cache_flags(unsigned long object_size, |
1211 | unsigned long flags, const char *name, | 1198 | unsigned long flags, const char *name, |
1212 | void (*ctor)(void *)) | 1199 | void (*ctor)(void *)) |
1213 | { | 1200 | { |
@@ -1237,7 +1224,7 @@ static inline int check_object(struct kmem_cache *s, struct page *page, | |||
1237 | static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n, | 1224 | static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n, |
1238 | struct page *page) {} | 1225 | struct page *page) {} |
1239 | static inline void remove_full(struct kmem_cache *s, struct page *page) {} | 1226 | static inline void remove_full(struct kmem_cache *s, struct page *page) {} |
1240 | static inline unsigned long kmem_cache_flags(unsigned long objsize, | 1227 | static inline unsigned long kmem_cache_flags(unsigned long object_size, |
1241 | unsigned long flags, const char *name, | 1228 | unsigned long flags, const char *name, |
1242 | void (*ctor)(void *)) | 1229 | void (*ctor)(void *)) |
1243 | { | 1230 | { |
@@ -1314,13 +1301,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
1314 | stat(s, ORDER_FALLBACK); | 1301 | stat(s, ORDER_FALLBACK); |
1315 | } | 1302 | } |
1316 | 1303 | ||
1317 | if (flags & __GFP_WAIT) | 1304 | if (kmemcheck_enabled && page |
1318 | local_irq_disable(); | ||
1319 | |||
1320 | if (!page) | ||
1321 | return NULL; | ||
1322 | |||
1323 | if (kmemcheck_enabled | ||
1324 | && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) { | 1305 | && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) { |
1325 | int pages = 1 << oo_order(oo); | 1306 | int pages = 1 << oo_order(oo); |
1326 | 1307 | ||
@@ -1336,6 +1317,11 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
1336 | kmemcheck_mark_unallocated_pages(page, pages); | 1317 | kmemcheck_mark_unallocated_pages(page, pages); |
1337 | } | 1318 | } |
1338 | 1319 | ||
1320 | if (flags & __GFP_WAIT) | ||
1321 | local_irq_disable(); | ||
1322 | if (!page) | ||
1323 | return NULL; | ||
1324 | |||
1339 | page->objects = oo_objects(oo); | 1325 | page->objects = oo_objects(oo); |
1340 | mod_zone_page_state(page_zone(page), | 1326 | mod_zone_page_state(page_zone(page), |
1341 | (s->flags & SLAB_RECLAIM_ACCOUNT) ? | 1327 | (s->flags & SLAB_RECLAIM_ACCOUNT) ? |
@@ -1370,6 +1356,8 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
1370 | inc_slabs_node(s, page_to_nid(page), page->objects); | 1356 | inc_slabs_node(s, page_to_nid(page), page->objects); |
1371 | page->slab = s; | 1357 | page->slab = s; |
1372 | __SetPageSlab(page); | 1358 | __SetPageSlab(page); |
1359 | if (page->pfmemalloc) | ||
1360 | SetPageSlabPfmemalloc(page); | ||
1373 | 1361 | ||
1374 | start = page_address(page); | 1362 | start = page_address(page); |
1375 | 1363 | ||
@@ -1413,6 +1401,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page) | |||
1413 | NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, | 1401 | NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, |
1414 | -pages); | 1402 | -pages); |
1415 | 1403 | ||
1404 | __ClearPageSlabPfmemalloc(page); | ||
1416 | __ClearPageSlab(page); | 1405 | __ClearPageSlab(page); |
1417 | reset_page_mapcount(page); | 1406 | reset_page_mapcount(page); |
1418 | if (current->reclaim_state) | 1407 | if (current->reclaim_state) |
@@ -1490,12 +1479,12 @@ static inline void remove_partial(struct kmem_cache_node *n, | |||
1490 | } | 1479 | } |
1491 | 1480 | ||
1492 | /* | 1481 | /* |
1493 | * Lock slab, remove from the partial list and put the object into the | 1482 | * Remove slab from the partial list, freeze it and |
1494 | * per cpu freelist. | 1483 | * return the pointer to the freelist. |
1495 | * | 1484 | * |
1496 | * Returns a list of objects or NULL if it fails. | 1485 | * Returns a list of objects or NULL if it fails. |
1497 | * | 1486 | * |
1498 | * Must hold list_lock. | 1487 | * Must hold list_lock since we modify the partial list. |
1499 | */ | 1488 | */ |
1500 | static inline void *acquire_slab(struct kmem_cache *s, | 1489 | static inline void *acquire_slab(struct kmem_cache *s, |
1501 | struct kmem_cache_node *n, struct page *page, | 1490 | struct kmem_cache_node *n, struct page *page, |
@@ -1510,26 +1499,27 @@ static inline void *acquire_slab(struct kmem_cache *s, | |||
1510 | * The old freelist is the list of objects for the | 1499 | * The old freelist is the list of objects for the |
1511 | * per cpu allocation list. | 1500 | * per cpu allocation list. |
1512 | */ | 1501 | */ |
1513 | do { | 1502 | freelist = page->freelist; |
1514 | freelist = page->freelist; | 1503 | counters = page->counters; |
1515 | counters = page->counters; | 1504 | new.counters = counters; |
1516 | new.counters = counters; | 1505 | if (mode) { |
1517 | if (mode) { | 1506 | new.inuse = page->objects; |
1518 | new.inuse = page->objects; | 1507 | new.freelist = NULL; |
1519 | new.freelist = NULL; | 1508 | } else { |
1520 | } else { | 1509 | new.freelist = freelist; |
1521 | new.freelist = freelist; | 1510 | } |
1522 | } | ||
1523 | 1511 | ||
1524 | VM_BUG_ON(new.frozen); | 1512 | VM_BUG_ON(new.frozen); |
1525 | new.frozen = 1; | 1513 | new.frozen = 1; |
1526 | 1514 | ||
1527 | } while (!__cmpxchg_double_slab(s, page, | 1515 | if (!__cmpxchg_double_slab(s, page, |
1528 | freelist, counters, | 1516 | freelist, counters, |
1529 | new.freelist, new.counters, | 1517 | new.freelist, new.counters, |
1530 | "lock and freeze")); | 1518 | "acquire_slab")) |
1519 | return NULL; | ||
1531 | 1520 | ||
1532 | remove_partial(n, page); | 1521 | remove_partial(n, page); |
1522 | WARN_ON(!freelist); | ||
1533 | return freelist; | 1523 | return freelist; |
1534 | } | 1524 | } |
1535 | 1525 | ||
@@ -1563,7 +1553,6 @@ static void *get_partial_node(struct kmem_cache *s, | |||
1563 | 1553 | ||
1564 | if (!object) { | 1554 | if (!object) { |
1565 | c->page = page; | 1555 | c->page = page; |
1566 | c->node = page_to_nid(page); | ||
1567 | stat(s, ALLOC_FROM_PARTIAL); | 1556 | stat(s, ALLOC_FROM_PARTIAL); |
1568 | object = t; | 1557 | object = t; |
1569 | available = page->objects - page->inuse; | 1558 | available = page->objects - page->inuse; |
@@ -1617,7 +1606,7 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags, | |||
1617 | 1606 | ||
1618 | do { | 1607 | do { |
1619 | cpuset_mems_cookie = get_mems_allowed(); | 1608 | cpuset_mems_cookie = get_mems_allowed(); |
1620 | zonelist = node_zonelist(slab_node(current->mempolicy), flags); | 1609 | zonelist = node_zonelist(slab_node(), flags); |
1621 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { | 1610 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { |
1622 | struct kmem_cache_node *n; | 1611 | struct kmem_cache_node *n; |
1623 | 1612 | ||
@@ -1731,14 +1720,12 @@ void init_kmem_cache_cpus(struct kmem_cache *s) | |||
1731 | /* | 1720 | /* |
1732 | * Remove the cpu slab | 1721 | * Remove the cpu slab |
1733 | */ | 1722 | */ |
1734 | static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) | 1723 | static void deactivate_slab(struct kmem_cache *s, struct page *page, void *freelist) |
1735 | { | 1724 | { |
1736 | enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE }; | 1725 | enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE }; |
1737 | struct page *page = c->page; | ||
1738 | struct kmem_cache_node *n = get_node(s, page_to_nid(page)); | 1726 | struct kmem_cache_node *n = get_node(s, page_to_nid(page)); |
1739 | int lock = 0; | 1727 | int lock = 0; |
1740 | enum slab_modes l = M_NONE, m = M_NONE; | 1728 | enum slab_modes l = M_NONE, m = M_NONE; |
1741 | void *freelist; | ||
1742 | void *nextfree; | 1729 | void *nextfree; |
1743 | int tail = DEACTIVATE_TO_HEAD; | 1730 | int tail = DEACTIVATE_TO_HEAD; |
1744 | struct page new; | 1731 | struct page new; |
@@ -1749,11 +1736,6 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) | |||
1749 | tail = DEACTIVATE_TO_TAIL; | 1736 | tail = DEACTIVATE_TO_TAIL; |
1750 | } | 1737 | } |
1751 | 1738 | ||
1752 | c->tid = next_tid(c->tid); | ||
1753 | c->page = NULL; | ||
1754 | freelist = c->freelist; | ||
1755 | c->freelist = NULL; | ||
1756 | |||
1757 | /* | 1739 | /* |
1758 | * Stage one: Free all available per cpu objects back | 1740 | * Stage one: Free all available per cpu objects back |
1759 | * to the page freelist while it is still frozen. Leave the | 1741 | * to the page freelist while it is still frozen. Leave the |
@@ -1879,21 +1861,31 @@ redo: | |||
1879 | } | 1861 | } |
1880 | } | 1862 | } |
1881 | 1863 | ||
1882 | /* Unfreeze all the cpu partial slabs */ | 1864 | /* |
1865 | * Unfreeze all the cpu partial slabs. | ||
1866 | * | ||
1867 | * This function must be called with interrupt disabled. | ||
1868 | */ | ||
1883 | static void unfreeze_partials(struct kmem_cache *s) | 1869 | static void unfreeze_partials(struct kmem_cache *s) |
1884 | { | 1870 | { |
1885 | struct kmem_cache_node *n = NULL; | 1871 | struct kmem_cache_node *n = NULL, *n2 = NULL; |
1886 | struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab); | 1872 | struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab); |
1887 | struct page *page, *discard_page = NULL; | 1873 | struct page *page, *discard_page = NULL; |
1888 | 1874 | ||
1889 | while ((page = c->partial)) { | 1875 | while ((page = c->partial)) { |
1890 | enum slab_modes { M_PARTIAL, M_FREE }; | ||
1891 | enum slab_modes l, m; | ||
1892 | struct page new; | 1876 | struct page new; |
1893 | struct page old; | 1877 | struct page old; |
1894 | 1878 | ||
1895 | c->partial = page->next; | 1879 | c->partial = page->next; |
1896 | l = M_FREE; | 1880 | |
1881 | n2 = get_node(s, page_to_nid(page)); | ||
1882 | if (n != n2) { | ||
1883 | if (n) | ||
1884 | spin_unlock(&n->list_lock); | ||
1885 | |||
1886 | n = n2; | ||
1887 | spin_lock(&n->list_lock); | ||
1888 | } | ||
1897 | 1889 | ||
1898 | do { | 1890 | do { |
1899 | 1891 | ||
@@ -1906,43 +1898,17 @@ static void unfreeze_partials(struct kmem_cache *s) | |||
1906 | 1898 | ||
1907 | new.frozen = 0; | 1899 | new.frozen = 0; |
1908 | 1900 | ||
1909 | if (!new.inuse && (!n || n->nr_partial > s->min_partial)) | 1901 | } while (!__cmpxchg_double_slab(s, page, |
1910 | m = M_FREE; | ||
1911 | else { | ||
1912 | struct kmem_cache_node *n2 = get_node(s, | ||
1913 | page_to_nid(page)); | ||
1914 | |||
1915 | m = M_PARTIAL; | ||
1916 | if (n != n2) { | ||
1917 | if (n) | ||
1918 | spin_unlock(&n->list_lock); | ||
1919 | |||
1920 | n = n2; | ||
1921 | spin_lock(&n->list_lock); | ||
1922 | } | ||
1923 | } | ||
1924 | |||
1925 | if (l != m) { | ||
1926 | if (l == M_PARTIAL) { | ||
1927 | remove_partial(n, page); | ||
1928 | stat(s, FREE_REMOVE_PARTIAL); | ||
1929 | } else { | ||
1930 | add_partial(n, page, | ||
1931 | DEACTIVATE_TO_TAIL); | ||
1932 | stat(s, FREE_ADD_PARTIAL); | ||
1933 | } | ||
1934 | |||
1935 | l = m; | ||
1936 | } | ||
1937 | |||
1938 | } while (!cmpxchg_double_slab(s, page, | ||
1939 | old.freelist, old.counters, | 1902 | old.freelist, old.counters, |
1940 | new.freelist, new.counters, | 1903 | new.freelist, new.counters, |
1941 | "unfreezing slab")); | 1904 | "unfreezing slab")); |
1942 | 1905 | ||
1943 | if (m == M_FREE) { | 1906 | if (unlikely(!new.inuse && n->nr_partial > s->min_partial)) { |
1944 | page->next = discard_page; | 1907 | page->next = discard_page; |
1945 | discard_page = page; | 1908 | discard_page = page; |
1909 | } else { | ||
1910 | add_partial(n, page, DEACTIVATE_TO_TAIL); | ||
1911 | stat(s, FREE_ADD_PARTIAL); | ||
1946 | } | 1912 | } |
1947 | } | 1913 | } |
1948 | 1914 | ||
@@ -2011,7 +1977,11 @@ int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) | |||
2011 | static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) | 1977 | static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) |
2012 | { | 1978 | { |
2013 | stat(s, CPUSLAB_FLUSH); | 1979 | stat(s, CPUSLAB_FLUSH); |
2014 | deactivate_slab(s, c); | 1980 | deactivate_slab(s, c->page, c->freelist); |
1981 | |||
1982 | c->tid = next_tid(c->tid); | ||
1983 | c->page = NULL; | ||
1984 | c->freelist = NULL; | ||
2015 | } | 1985 | } |
2016 | 1986 | ||
2017 | /* | 1987 | /* |
@@ -2055,10 +2025,10 @@ static void flush_all(struct kmem_cache *s) | |||
2055 | * Check if the objects in a per cpu structure fit numa | 2025 | * Check if the objects in a per cpu structure fit numa |
2056 | * locality expectations. | 2026 | * locality expectations. |
2057 | */ | 2027 | */ |
2058 | static inline int node_match(struct kmem_cache_cpu *c, int node) | 2028 | static inline int node_match(struct page *page, int node) |
2059 | { | 2029 | { |
2060 | #ifdef CONFIG_NUMA | 2030 | #ifdef CONFIG_NUMA |
2061 | if (node != NUMA_NO_NODE && c->node != node) | 2031 | if (node != NUMA_NO_NODE && page_to_nid(page) != node) |
2062 | return 0; | 2032 | return 0; |
2063 | #endif | 2033 | #endif |
2064 | return 1; | 2034 | return 1; |
@@ -2101,10 +2071,10 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) | |||
2101 | "SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n", | 2071 | "SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n", |
2102 | nid, gfpflags); | 2072 | nid, gfpflags); |
2103 | printk(KERN_WARNING " cache: %s, object size: %d, buffer size: %d, " | 2073 | printk(KERN_WARNING " cache: %s, object size: %d, buffer size: %d, " |
2104 | "default order: %d, min order: %d\n", s->name, s->objsize, | 2074 | "default order: %d, min order: %d\n", s->name, s->object_size, |
2105 | s->size, oo_order(s->oo), oo_order(s->min)); | 2075 | s->size, oo_order(s->oo), oo_order(s->min)); |
2106 | 2076 | ||
2107 | if (oo_order(s->min) > get_order(s->objsize)) | 2077 | if (oo_order(s->min) > get_order(s->object_size)) |
2108 | printk(KERN_WARNING " %s debugging increased min order, use " | 2078 | printk(KERN_WARNING " %s debugging increased min order, use " |
2109 | "slub_debug=O to disable.\n", s->name); | 2079 | "slub_debug=O to disable.\n", s->name); |
2110 | 2080 | ||
@@ -2130,10 +2100,16 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) | |||
2130 | static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags, | 2100 | static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags, |
2131 | int node, struct kmem_cache_cpu **pc) | 2101 | int node, struct kmem_cache_cpu **pc) |
2132 | { | 2102 | { |
2133 | void *object; | 2103 | void *freelist; |
2134 | struct kmem_cache_cpu *c; | 2104 | struct kmem_cache_cpu *c = *pc; |
2135 | struct page *page = new_slab(s, flags, node); | 2105 | struct page *page; |
2136 | 2106 | ||
2107 | freelist = get_partial(s, flags, node, c); | ||
2108 | |||
2109 | if (freelist) | ||
2110 | return freelist; | ||
2111 | |||
2112 | page = new_slab(s, flags, node); | ||
2137 | if (page) { | 2113 | if (page) { |
2138 | c = __this_cpu_ptr(s->cpu_slab); | 2114 | c = __this_cpu_ptr(s->cpu_slab); |
2139 | if (c->page) | 2115 | if (c->page) |
@@ -2143,17 +2119,24 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags, | |||
2143 | * No other reference to the page yet so we can | 2119 | * No other reference to the page yet so we can |
2144 | * muck around with it freely without cmpxchg | 2120 | * muck around with it freely without cmpxchg |
2145 | */ | 2121 | */ |
2146 | object = page->freelist; | 2122 | freelist = page->freelist; |
2147 | page->freelist = NULL; | 2123 | page->freelist = NULL; |
2148 | 2124 | ||
2149 | stat(s, ALLOC_SLAB); | 2125 | stat(s, ALLOC_SLAB); |
2150 | c->node = page_to_nid(page); | ||
2151 | c->page = page; | 2126 | c->page = page; |
2152 | *pc = c; | 2127 | *pc = c; |
2153 | } else | 2128 | } else |
2154 | object = NULL; | 2129 | freelist = NULL; |
2155 | 2130 | ||
2156 | return object; | 2131 | return freelist; |
2132 | } | ||
2133 | |||
2134 | static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags) | ||
2135 | { | ||
2136 | if (unlikely(PageSlabPfmemalloc(page))) | ||
2137 | return gfp_pfmemalloc_allowed(gfpflags); | ||
2138 | |||
2139 | return true; | ||
2157 | } | 2140 | } |
2158 | 2141 | ||
2159 | /* | 2142 | /* |
@@ -2163,6 +2146,8 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags, | |||
2163 | * The page is still frozen if the return value is not NULL. | 2146 | * The page is still frozen if the return value is not NULL. |
2164 | * | 2147 | * |
2165 | * If this function returns NULL then the page has been unfrozen. | 2148 | * If this function returns NULL then the page has been unfrozen. |
2149 | * | ||
2150 | * This function must be called with interrupt disabled. | ||
2166 | */ | 2151 | */ |
2167 | static inline void *get_freelist(struct kmem_cache *s, struct page *page) | 2152 | static inline void *get_freelist(struct kmem_cache *s, struct page *page) |
2168 | { | 2153 | { |
@@ -2173,13 +2158,14 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page) | |||
2173 | do { | 2158 | do { |
2174 | freelist = page->freelist; | 2159 | freelist = page->freelist; |
2175 | counters = page->counters; | 2160 | counters = page->counters; |
2161 | |||
2176 | new.counters = counters; | 2162 | new.counters = counters; |
2177 | VM_BUG_ON(!new.frozen); | 2163 | VM_BUG_ON(!new.frozen); |
2178 | 2164 | ||
2179 | new.inuse = page->objects; | 2165 | new.inuse = page->objects; |
2180 | new.frozen = freelist != NULL; | 2166 | new.frozen = freelist != NULL; |
2181 | 2167 | ||
2182 | } while (!cmpxchg_double_slab(s, page, | 2168 | } while (!__cmpxchg_double_slab(s, page, |
2183 | freelist, counters, | 2169 | freelist, counters, |
2184 | NULL, new.counters, | 2170 | NULL, new.counters, |
2185 | "get_freelist")); | 2171 | "get_freelist")); |
@@ -2206,7 +2192,8 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page) | |||
2206 | static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, | 2192 | static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, |
2207 | unsigned long addr, struct kmem_cache_cpu *c) | 2193 | unsigned long addr, struct kmem_cache_cpu *c) |
2208 | { | 2194 | { |
2209 | void **object; | 2195 | void *freelist; |
2196 | struct page *page; | ||
2210 | unsigned long flags; | 2197 | unsigned long flags; |
2211 | 2198 | ||
2212 | local_irq_save(flags); | 2199 | local_irq_save(flags); |
@@ -2219,25 +2206,41 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, | |||
2219 | c = this_cpu_ptr(s->cpu_slab); | 2206 | c = this_cpu_ptr(s->cpu_slab); |
2220 | #endif | 2207 | #endif |
2221 | 2208 | ||
2222 | if (!c->page) | 2209 | page = c->page; |
2210 | if (!page) | ||
2223 | goto new_slab; | 2211 | goto new_slab; |
2224 | redo: | 2212 | redo: |
2225 | if (unlikely(!node_match(c, node))) { | 2213 | |
2214 | if (unlikely(!node_match(page, node))) { | ||
2226 | stat(s, ALLOC_NODE_MISMATCH); | 2215 | stat(s, ALLOC_NODE_MISMATCH); |
2227 | deactivate_slab(s, c); | 2216 | deactivate_slab(s, page, c->freelist); |
2217 | c->page = NULL; | ||
2218 | c->freelist = NULL; | ||
2219 | goto new_slab; | ||
2220 | } | ||
2221 | |||
2222 | /* | ||
2223 | * By rights, we should be searching for a slab page that was | ||
2224 | * PFMEMALLOC but right now, we are losing the pfmemalloc | ||
2225 | * information when the page leaves the per-cpu allocator | ||
2226 | */ | ||
2227 | if (unlikely(!pfmemalloc_match(page, gfpflags))) { | ||
2228 | deactivate_slab(s, page, c->freelist); | ||
2229 | c->page = NULL; | ||
2230 | c->freelist = NULL; | ||
2228 | goto new_slab; | 2231 | goto new_slab; |
2229 | } | 2232 | } |
2230 | 2233 | ||
2231 | /* must check again c->freelist in case of cpu migration or IRQ */ | 2234 | /* must check again c->freelist in case of cpu migration or IRQ */ |
2232 | object = c->freelist; | 2235 | freelist = c->freelist; |
2233 | if (object) | 2236 | if (freelist) |
2234 | goto load_freelist; | 2237 | goto load_freelist; |
2235 | 2238 | ||
2236 | stat(s, ALLOC_SLOWPATH); | 2239 | stat(s, ALLOC_SLOWPATH); |
2237 | 2240 | ||
2238 | object = get_freelist(s, c->page); | 2241 | freelist = get_freelist(s, page); |
2239 | 2242 | ||
2240 | if (!object) { | 2243 | if (!freelist) { |
2241 | c->page = NULL; | 2244 | c->page = NULL; |
2242 | stat(s, DEACTIVATE_BYPASS); | 2245 | stat(s, DEACTIVATE_BYPASS); |
2243 | goto new_slab; | 2246 | goto new_slab; |
@@ -2246,50 +2249,50 @@ redo: | |||
2246 | stat(s, ALLOC_REFILL); | 2249 | stat(s, ALLOC_REFILL); |
2247 | 2250 | ||
2248 | load_freelist: | 2251 | load_freelist: |
2249 | c->freelist = get_freepointer(s, object); | 2252 | /* |
2253 | * freelist is pointing to the list of objects to be used. | ||
2254 | * page is pointing to the page from which the objects are obtained. | ||
2255 | * That page must be frozen for per cpu allocations to work. | ||
2256 | */ | ||
2257 | VM_BUG_ON(!c->page->frozen); | ||
2258 | c->freelist = get_freepointer(s, freelist); | ||
2250 | c->tid = next_tid(c->tid); | 2259 | c->tid = next_tid(c->tid); |
2251 | local_irq_restore(flags); | 2260 | local_irq_restore(flags); |
2252 | return object; | 2261 | return freelist; |
2253 | 2262 | ||
2254 | new_slab: | 2263 | new_slab: |
2255 | 2264 | ||
2256 | if (c->partial) { | 2265 | if (c->partial) { |
2257 | c->page = c->partial; | 2266 | page = c->page = c->partial; |
2258 | c->partial = c->page->next; | 2267 | c->partial = page->next; |
2259 | c->node = page_to_nid(c->page); | ||
2260 | stat(s, CPU_PARTIAL_ALLOC); | 2268 | stat(s, CPU_PARTIAL_ALLOC); |
2261 | c->freelist = NULL; | 2269 | c->freelist = NULL; |
2262 | goto redo; | 2270 | goto redo; |
2263 | } | 2271 | } |
2264 | 2272 | ||
2265 | /* Then do expensive stuff like retrieving pages from the partial lists */ | 2273 | freelist = new_slab_objects(s, gfpflags, node, &c); |
2266 | object = get_partial(s, gfpflags, node, c); | ||
2267 | |||
2268 | if (unlikely(!object)) { | ||
2269 | 2274 | ||
2270 | object = new_slab_objects(s, gfpflags, node, &c); | 2275 | if (unlikely(!freelist)) { |
2276 | if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit()) | ||
2277 | slab_out_of_memory(s, gfpflags, node); | ||
2271 | 2278 | ||
2272 | if (unlikely(!object)) { | 2279 | local_irq_restore(flags); |
2273 | if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit()) | 2280 | return NULL; |
2274 | slab_out_of_memory(s, gfpflags, node); | ||
2275 | |||
2276 | local_irq_restore(flags); | ||
2277 | return NULL; | ||
2278 | } | ||
2279 | } | 2281 | } |
2280 | 2282 | ||
2281 | if (likely(!kmem_cache_debug(s))) | 2283 | page = c->page; |
2284 | if (likely(!kmem_cache_debug(s) && pfmemalloc_match(page, gfpflags))) | ||
2282 | goto load_freelist; | 2285 | goto load_freelist; |
2283 | 2286 | ||
2284 | /* Only entered in the debug case */ | 2287 | /* Only entered in the debug case */ |
2285 | if (!alloc_debug_processing(s, c->page, object, addr)) | 2288 | if (kmem_cache_debug(s) && !alloc_debug_processing(s, page, freelist, addr)) |
2286 | goto new_slab; /* Slab failed checks. Next slab needed */ | 2289 | goto new_slab; /* Slab failed checks. Next slab needed */ |
2287 | 2290 | ||
2288 | c->freelist = get_freepointer(s, object); | 2291 | deactivate_slab(s, page, get_freepointer(s, freelist)); |
2289 | deactivate_slab(s, c); | 2292 | c->page = NULL; |
2290 | c->node = NUMA_NO_NODE; | 2293 | c->freelist = NULL; |
2291 | local_irq_restore(flags); | 2294 | local_irq_restore(flags); |
2292 | return object; | 2295 | return freelist; |
2293 | } | 2296 | } |
2294 | 2297 | ||
2295 | /* | 2298 | /* |
@@ -2307,6 +2310,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, | |||
2307 | { | 2310 | { |
2308 | void **object; | 2311 | void **object; |
2309 | struct kmem_cache_cpu *c; | 2312 | struct kmem_cache_cpu *c; |
2313 | struct page *page; | ||
2310 | unsigned long tid; | 2314 | unsigned long tid; |
2311 | 2315 | ||
2312 | if (slab_pre_alloc_hook(s, gfpflags)) | 2316 | if (slab_pre_alloc_hook(s, gfpflags)) |
@@ -2332,8 +2336,8 @@ redo: | |||
2332 | barrier(); | 2336 | barrier(); |
2333 | 2337 | ||
2334 | object = c->freelist; | 2338 | object = c->freelist; |
2335 | if (unlikely(!object || !node_match(c, node))) | 2339 | page = c->page; |
2336 | 2340 | if (unlikely(!object || !node_match(page, node))) | |
2337 | object = __slab_alloc(s, gfpflags, node, addr, c); | 2341 | object = __slab_alloc(s, gfpflags, node, addr, c); |
2338 | 2342 | ||
2339 | else { | 2343 | else { |
@@ -2364,7 +2368,7 @@ redo: | |||
2364 | } | 2368 | } |
2365 | 2369 | ||
2366 | if (unlikely(gfpflags & __GFP_ZERO) && object) | 2370 | if (unlikely(gfpflags & __GFP_ZERO) && object) |
2367 | memset(object, 0, s->objsize); | 2371 | memset(object, 0, s->object_size); |
2368 | 2372 | ||
2369 | slab_post_alloc_hook(s, gfpflags, object); | 2373 | slab_post_alloc_hook(s, gfpflags, object); |
2370 | 2374 | ||
@@ -2375,7 +2379,7 @@ void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) | |||
2375 | { | 2379 | { |
2376 | void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_); | 2380 | void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_); |
2377 | 2381 | ||
2378 | trace_kmem_cache_alloc(_RET_IP_, ret, s->objsize, s->size, gfpflags); | 2382 | trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size, s->size, gfpflags); |
2379 | 2383 | ||
2380 | return ret; | 2384 | return ret; |
2381 | } | 2385 | } |
@@ -2405,7 +2409,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) | |||
2405 | void *ret = slab_alloc(s, gfpflags, node, _RET_IP_); | 2409 | void *ret = slab_alloc(s, gfpflags, node, _RET_IP_); |
2406 | 2410 | ||
2407 | trace_kmem_cache_alloc_node(_RET_IP_, ret, | 2411 | trace_kmem_cache_alloc_node(_RET_IP_, ret, |
2408 | s->objsize, s->size, gfpflags, node); | 2412 | s->object_size, s->size, gfpflags, node); |
2409 | 2413 | ||
2410 | return ret; | 2414 | return ret; |
2411 | } | 2415 | } |
@@ -2900,7 +2904,7 @@ static void set_min_partial(struct kmem_cache *s, unsigned long min) | |||
2900 | static int calculate_sizes(struct kmem_cache *s, int forced_order) | 2904 | static int calculate_sizes(struct kmem_cache *s, int forced_order) |
2901 | { | 2905 | { |
2902 | unsigned long flags = s->flags; | 2906 | unsigned long flags = s->flags; |
2903 | unsigned long size = s->objsize; | 2907 | unsigned long size = s->object_size; |
2904 | unsigned long align = s->align; | 2908 | unsigned long align = s->align; |
2905 | int order; | 2909 | int order; |
2906 | 2910 | ||
@@ -2929,7 +2933,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) | |||
2929 | * end of the object and the free pointer. If not then add an | 2933 | * end of the object and the free pointer. If not then add an |
2930 | * additional word to have some bytes to store Redzone information. | 2934 | * additional word to have some bytes to store Redzone information. |
2931 | */ | 2935 | */ |
2932 | if ((flags & SLAB_RED_ZONE) && size == s->objsize) | 2936 | if ((flags & SLAB_RED_ZONE) && size == s->object_size) |
2933 | size += sizeof(void *); | 2937 | size += sizeof(void *); |
2934 | #endif | 2938 | #endif |
2935 | 2939 | ||
@@ -2977,7 +2981,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) | |||
2977 | * user specified and the dynamic determination of cache line size | 2981 | * user specified and the dynamic determination of cache line size |
2978 | * on bootup. | 2982 | * on bootup. |
2979 | */ | 2983 | */ |
2980 | align = calculate_alignment(flags, align, s->objsize); | 2984 | align = calculate_alignment(flags, align, s->object_size); |
2981 | s->align = align; | 2985 | s->align = align; |
2982 | 2986 | ||
2983 | /* | 2987 | /* |
@@ -3025,7 +3029,7 @@ static int kmem_cache_open(struct kmem_cache *s, | |||
3025 | memset(s, 0, kmem_size); | 3029 | memset(s, 0, kmem_size); |
3026 | s->name = name; | 3030 | s->name = name; |
3027 | s->ctor = ctor; | 3031 | s->ctor = ctor; |
3028 | s->objsize = size; | 3032 | s->object_size = size; |
3029 | s->align = align; | 3033 | s->align = align; |
3030 | s->flags = kmem_cache_flags(size, flags, name, ctor); | 3034 | s->flags = kmem_cache_flags(size, flags, name, ctor); |
3031 | s->reserved = 0; | 3035 | s->reserved = 0; |
@@ -3040,7 +3044,7 @@ static int kmem_cache_open(struct kmem_cache *s, | |||
3040 | * Disable debugging flags that store metadata if the min slab | 3044 | * Disable debugging flags that store metadata if the min slab |
3041 | * order increased. | 3045 | * order increased. |
3042 | */ | 3046 | */ |
3043 | if (get_order(s->size) > get_order(s->objsize)) { | 3047 | if (get_order(s->size) > get_order(s->object_size)) { |
3044 | s->flags &= ~DEBUG_METADATA_FLAGS; | 3048 | s->flags &= ~DEBUG_METADATA_FLAGS; |
3045 | s->offset = 0; | 3049 | s->offset = 0; |
3046 | if (!calculate_sizes(s, -1)) | 3050 | if (!calculate_sizes(s, -1)) |
@@ -3114,7 +3118,7 @@ error: | |||
3114 | */ | 3118 | */ |
3115 | unsigned int kmem_cache_size(struct kmem_cache *s) | 3119 | unsigned int kmem_cache_size(struct kmem_cache *s) |
3116 | { | 3120 | { |
3117 | return s->objsize; | 3121 | return s->object_size; |
3118 | } | 3122 | } |
3119 | EXPORT_SYMBOL(kmem_cache_size); | 3123 | EXPORT_SYMBOL(kmem_cache_size); |
3120 | 3124 | ||
@@ -3192,11 +3196,11 @@ static inline int kmem_cache_close(struct kmem_cache *s) | |||
3192 | */ | 3196 | */ |
3193 | void kmem_cache_destroy(struct kmem_cache *s) | 3197 | void kmem_cache_destroy(struct kmem_cache *s) |
3194 | { | 3198 | { |
3195 | down_write(&slub_lock); | 3199 | mutex_lock(&slab_mutex); |
3196 | s->refcount--; | 3200 | s->refcount--; |
3197 | if (!s->refcount) { | 3201 | if (!s->refcount) { |
3198 | list_del(&s->list); | 3202 | list_del(&s->list); |
3199 | up_write(&slub_lock); | 3203 | mutex_unlock(&slab_mutex); |
3200 | if (kmem_cache_close(s)) { | 3204 | if (kmem_cache_close(s)) { |
3201 | printk(KERN_ERR "SLUB %s: %s called for cache that " | 3205 | printk(KERN_ERR "SLUB %s: %s called for cache that " |
3202 | "still has objects.\n", s->name, __func__); | 3206 | "still has objects.\n", s->name, __func__); |
@@ -3206,7 +3210,7 @@ void kmem_cache_destroy(struct kmem_cache *s) | |||
3206 | rcu_barrier(); | 3210 | rcu_barrier(); |
3207 | sysfs_slab_remove(s); | 3211 | sysfs_slab_remove(s); |
3208 | } else | 3212 | } else |
3209 | up_write(&slub_lock); | 3213 | mutex_unlock(&slab_mutex); |
3210 | } | 3214 | } |
3211 | EXPORT_SYMBOL(kmem_cache_destroy); | 3215 | EXPORT_SYMBOL(kmem_cache_destroy); |
3212 | 3216 | ||
@@ -3268,7 +3272,7 @@ static struct kmem_cache *__init create_kmalloc_cache(const char *name, | |||
3268 | 3272 | ||
3269 | /* | 3273 | /* |
3270 | * This function is called with IRQs disabled during early-boot on | 3274 | * This function is called with IRQs disabled during early-boot on |
3271 | * single CPU so there's no need to take slub_lock here. | 3275 | * single CPU so there's no need to take slab_mutex here. |
3272 | */ | 3276 | */ |
3273 | if (!kmem_cache_open(s, name, size, ARCH_KMALLOC_MINALIGN, | 3277 | if (!kmem_cache_open(s, name, size, ARCH_KMALLOC_MINALIGN, |
3274 | flags, NULL)) | 3278 | flags, NULL)) |
@@ -3553,10 +3557,10 @@ static int slab_mem_going_offline_callback(void *arg) | |||
3553 | { | 3557 | { |
3554 | struct kmem_cache *s; | 3558 | struct kmem_cache *s; |
3555 | 3559 | ||
3556 | down_read(&slub_lock); | 3560 | mutex_lock(&slab_mutex); |
3557 | list_for_each_entry(s, &slab_caches, list) | 3561 | list_for_each_entry(s, &slab_caches, list) |
3558 | kmem_cache_shrink(s); | 3562 | kmem_cache_shrink(s); |
3559 | up_read(&slub_lock); | 3563 | mutex_unlock(&slab_mutex); |
3560 | 3564 | ||
3561 | return 0; | 3565 | return 0; |
3562 | } | 3566 | } |
@@ -3577,7 +3581,7 @@ static void slab_mem_offline_callback(void *arg) | |||
3577 | if (offline_node < 0) | 3581 | if (offline_node < 0) |
3578 | return; | 3582 | return; |
3579 | 3583 | ||
3580 | down_read(&slub_lock); | 3584 | mutex_lock(&slab_mutex); |
3581 | list_for_each_entry(s, &slab_caches, list) { | 3585 | list_for_each_entry(s, &slab_caches, list) { |
3582 | n = get_node(s, offline_node); | 3586 | n = get_node(s, offline_node); |
3583 | if (n) { | 3587 | if (n) { |
@@ -3593,7 +3597,7 @@ static void slab_mem_offline_callback(void *arg) | |||
3593 | kmem_cache_free(kmem_cache_node, n); | 3597 | kmem_cache_free(kmem_cache_node, n); |
3594 | } | 3598 | } |
3595 | } | 3599 | } |
3596 | up_read(&slub_lock); | 3600 | mutex_unlock(&slab_mutex); |
3597 | } | 3601 | } |
3598 | 3602 | ||
3599 | static int slab_mem_going_online_callback(void *arg) | 3603 | static int slab_mem_going_online_callback(void *arg) |
@@ -3616,7 +3620,7 @@ static int slab_mem_going_online_callback(void *arg) | |||
3616 | * allocate a kmem_cache_node structure in order to bring the node | 3620 | * allocate a kmem_cache_node structure in order to bring the node |
3617 | * online. | 3621 | * online. |
3618 | */ | 3622 | */ |
3619 | down_read(&slub_lock); | 3623 | mutex_lock(&slab_mutex); |
3620 | list_for_each_entry(s, &slab_caches, list) { | 3624 | list_for_each_entry(s, &slab_caches, list) { |
3621 | /* | 3625 | /* |
3622 | * XXX: kmem_cache_alloc_node will fallback to other nodes | 3626 | * XXX: kmem_cache_alloc_node will fallback to other nodes |
@@ -3632,7 +3636,7 @@ static int slab_mem_going_online_callback(void *arg) | |||
3632 | s->node[nid] = n; | 3636 | s->node[nid] = n; |
3633 | } | 3637 | } |
3634 | out: | 3638 | out: |
3635 | up_read(&slub_lock); | 3639 | mutex_unlock(&slab_mutex); |
3636 | return ret; | 3640 | return ret; |
3637 | } | 3641 | } |
3638 | 3642 | ||
@@ -3843,11 +3847,11 @@ void __init kmem_cache_init(void) | |||
3843 | 3847 | ||
3844 | if (s && s->size) { | 3848 | if (s && s->size) { |
3845 | char *name = kasprintf(GFP_NOWAIT, | 3849 | char *name = kasprintf(GFP_NOWAIT, |
3846 | "dma-kmalloc-%d", s->objsize); | 3850 | "dma-kmalloc-%d", s->object_size); |
3847 | 3851 | ||
3848 | BUG_ON(!name); | 3852 | BUG_ON(!name); |
3849 | kmalloc_dma_caches[i] = create_kmalloc_cache(name, | 3853 | kmalloc_dma_caches[i] = create_kmalloc_cache(name, |
3850 | s->objsize, SLAB_CACHE_DMA); | 3854 | s->object_size, SLAB_CACHE_DMA); |
3851 | } | 3855 | } |
3852 | } | 3856 | } |
3853 | #endif | 3857 | #endif |
@@ -3924,16 +3928,12 @@ static struct kmem_cache *find_mergeable(size_t size, | |||
3924 | return NULL; | 3928 | return NULL; |
3925 | } | 3929 | } |
3926 | 3930 | ||
3927 | struct kmem_cache *kmem_cache_create(const char *name, size_t size, | 3931 | struct kmem_cache *__kmem_cache_create(const char *name, size_t size, |
3928 | size_t align, unsigned long flags, void (*ctor)(void *)) | 3932 | size_t align, unsigned long flags, void (*ctor)(void *)) |
3929 | { | 3933 | { |
3930 | struct kmem_cache *s; | 3934 | struct kmem_cache *s; |
3931 | char *n; | 3935 | char *n; |
3932 | 3936 | ||
3933 | if (WARN_ON(!name)) | ||
3934 | return NULL; | ||
3935 | |||
3936 | down_write(&slub_lock); | ||
3937 | s = find_mergeable(size, align, flags, name, ctor); | 3937 | s = find_mergeable(size, align, flags, name, ctor); |
3938 | if (s) { | 3938 | if (s) { |
3939 | s->refcount++; | 3939 | s->refcount++; |
@@ -3941,49 +3941,42 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, | |||
3941 | * Adjust the object sizes so that we clear | 3941 | * Adjust the object sizes so that we clear |
3942 | * the complete object on kzalloc. | 3942 | * the complete object on kzalloc. |
3943 | */ | 3943 | */ |
3944 | s->objsize = max(s->objsize, (int)size); | 3944 | s->object_size = max(s->object_size, (int)size); |
3945 | s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); | 3945 | s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); |
3946 | 3946 | ||
3947 | if (sysfs_slab_alias(s, name)) { | 3947 | if (sysfs_slab_alias(s, name)) { |
3948 | s->refcount--; | 3948 | s->refcount--; |
3949 | goto err; | 3949 | return NULL; |
3950 | } | 3950 | } |
3951 | up_write(&slub_lock); | ||
3952 | return s; | 3951 | return s; |
3953 | } | 3952 | } |
3954 | 3953 | ||
3955 | n = kstrdup(name, GFP_KERNEL); | 3954 | n = kstrdup(name, GFP_KERNEL); |
3956 | if (!n) | 3955 | if (!n) |
3957 | goto err; | 3956 | return NULL; |
3958 | 3957 | ||
3959 | s = kmalloc(kmem_size, GFP_KERNEL); | 3958 | s = kmalloc(kmem_size, GFP_KERNEL); |
3960 | if (s) { | 3959 | if (s) { |
3961 | if (kmem_cache_open(s, n, | 3960 | if (kmem_cache_open(s, n, |
3962 | size, align, flags, ctor)) { | 3961 | size, align, flags, ctor)) { |
3962 | int r; | ||
3963 | |||
3963 | list_add(&s->list, &slab_caches); | 3964 | list_add(&s->list, &slab_caches); |
3964 | up_write(&slub_lock); | 3965 | mutex_unlock(&slab_mutex); |
3965 | if (sysfs_slab_add(s)) { | 3966 | r = sysfs_slab_add(s); |
3966 | down_write(&slub_lock); | 3967 | mutex_lock(&slab_mutex); |
3967 | list_del(&s->list); | 3968 | |
3968 | kfree(n); | 3969 | if (!r) |
3969 | kfree(s); | 3970 | return s; |
3970 | goto err; | 3971 | |
3971 | } | 3972 | list_del(&s->list); |
3972 | return s; | 3973 | kmem_cache_close(s); |
3973 | } | 3974 | } |
3974 | kfree(s); | 3975 | kfree(s); |
3975 | } | 3976 | } |
3976 | kfree(n); | 3977 | kfree(n); |
3977 | err: | 3978 | return NULL; |
3978 | up_write(&slub_lock); | ||
3979 | |||
3980 | if (flags & SLAB_PANIC) | ||
3981 | panic("Cannot create slabcache %s\n", name); | ||
3982 | else | ||
3983 | s = NULL; | ||
3984 | return s; | ||
3985 | } | 3979 | } |
3986 | EXPORT_SYMBOL(kmem_cache_create); | ||
3987 | 3980 | ||
3988 | #ifdef CONFIG_SMP | 3981 | #ifdef CONFIG_SMP |
3989 | /* | 3982 | /* |
@@ -4002,13 +3995,13 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb, | |||
4002 | case CPU_UP_CANCELED_FROZEN: | 3995 | case CPU_UP_CANCELED_FROZEN: |
4003 | case CPU_DEAD: | 3996 | case CPU_DEAD: |
4004 | case CPU_DEAD_FROZEN: | 3997 | case CPU_DEAD_FROZEN: |
4005 | down_read(&slub_lock); | 3998 | mutex_lock(&slab_mutex); |
4006 | list_for_each_entry(s, &slab_caches, list) { | 3999 | list_for_each_entry(s, &slab_caches, list) { |
4007 | local_irq_save(flags); | 4000 | local_irq_save(flags); |
4008 | __flush_cpu_slab(s, cpu); | 4001 | __flush_cpu_slab(s, cpu); |
4009 | local_irq_restore(flags); | 4002 | local_irq_restore(flags); |
4010 | } | 4003 | } |
4011 | up_read(&slub_lock); | 4004 | mutex_unlock(&slab_mutex); |
4012 | break; | 4005 | break; |
4013 | default: | 4006 | default: |
4014 | break; | 4007 | break; |
@@ -4500,30 +4493,31 @@ static ssize_t show_slab_objects(struct kmem_cache *s, | |||
4500 | 4493 | ||
4501 | for_each_possible_cpu(cpu) { | 4494 | for_each_possible_cpu(cpu) { |
4502 | struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); | 4495 | struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); |
4503 | int node = ACCESS_ONCE(c->node); | 4496 | int node; |
4504 | struct page *page; | 4497 | struct page *page; |
4505 | 4498 | ||
4506 | if (node < 0) | ||
4507 | continue; | ||
4508 | page = ACCESS_ONCE(c->page); | 4499 | page = ACCESS_ONCE(c->page); |
4509 | if (page) { | 4500 | if (!page) |
4510 | if (flags & SO_TOTAL) | 4501 | continue; |
4511 | x = page->objects; | ||
4512 | else if (flags & SO_OBJECTS) | ||
4513 | x = page->inuse; | ||
4514 | else | ||
4515 | x = 1; | ||
4516 | 4502 | ||
4517 | total += x; | 4503 | node = page_to_nid(page); |
4518 | nodes[node] += x; | 4504 | if (flags & SO_TOTAL) |
4519 | } | 4505 | x = page->objects; |
4520 | page = c->partial; | 4506 | else if (flags & SO_OBJECTS) |
4507 | x = page->inuse; | ||
4508 | else | ||
4509 | x = 1; | ||
4521 | 4510 | ||
4511 | total += x; | ||
4512 | nodes[node] += x; | ||
4513 | |||
4514 | page = ACCESS_ONCE(c->partial); | ||
4522 | if (page) { | 4515 | if (page) { |
4523 | x = page->pobjects; | 4516 | x = page->pobjects; |
4524 | total += x; | 4517 | total += x; |
4525 | nodes[node] += x; | 4518 | nodes[node] += x; |
4526 | } | 4519 | } |
4520 | |||
4527 | per_cpu[node]++; | 4521 | per_cpu[node]++; |
4528 | } | 4522 | } |
4529 | } | 4523 | } |
@@ -4623,7 +4617,7 @@ SLAB_ATTR_RO(align); | |||
4623 | 4617 | ||
4624 | static ssize_t object_size_show(struct kmem_cache *s, char *buf) | 4618 | static ssize_t object_size_show(struct kmem_cache *s, char *buf) |
4625 | { | 4619 | { |
4626 | return sprintf(buf, "%d\n", s->objsize); | 4620 | return sprintf(buf, "%d\n", s->object_size); |
4627 | } | 4621 | } |
4628 | SLAB_ATTR_RO(object_size); | 4622 | SLAB_ATTR_RO(object_size); |
4629 | 4623 | ||
@@ -5286,7 +5280,7 @@ static int sysfs_slab_add(struct kmem_cache *s) | |||
5286 | const char *name; | 5280 | const char *name; |
5287 | int unmergeable; | 5281 | int unmergeable; |
5288 | 5282 | ||
5289 | if (slab_state < SYSFS) | 5283 | if (slab_state < FULL) |
5290 | /* Defer until later */ | 5284 | /* Defer until later */ |
5291 | return 0; | 5285 | return 0; |
5292 | 5286 | ||
@@ -5331,7 +5325,7 @@ static int sysfs_slab_add(struct kmem_cache *s) | |||
5331 | 5325 | ||
5332 | static void sysfs_slab_remove(struct kmem_cache *s) | 5326 | static void sysfs_slab_remove(struct kmem_cache *s) |
5333 | { | 5327 | { |
5334 | if (slab_state < SYSFS) | 5328 | if (slab_state < FULL) |
5335 | /* | 5329 | /* |
5336 | * Sysfs has not been setup yet so no need to remove the | 5330 | * Sysfs has not been setup yet so no need to remove the |
5337 | * cache from sysfs. | 5331 | * cache from sysfs. |
@@ -5359,7 +5353,7 @@ static int sysfs_slab_alias(struct kmem_cache *s, const char *name) | |||
5359 | { | 5353 | { |
5360 | struct saved_alias *al; | 5354 | struct saved_alias *al; |
5361 | 5355 | ||
5362 | if (slab_state == SYSFS) { | 5356 | if (slab_state == FULL) { |
5363 | /* | 5357 | /* |
5364 | * If we have a leftover link then remove it. | 5358 | * If we have a leftover link then remove it. |
5365 | */ | 5359 | */ |
@@ -5383,16 +5377,16 @@ static int __init slab_sysfs_init(void) | |||
5383 | struct kmem_cache *s; | 5377 | struct kmem_cache *s; |
5384 | int err; | 5378 | int err; |
5385 | 5379 | ||
5386 | down_write(&slub_lock); | 5380 | mutex_lock(&slab_mutex); |
5387 | 5381 | ||
5388 | slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj); | 5382 | slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj); |
5389 | if (!slab_kset) { | 5383 | if (!slab_kset) { |
5390 | up_write(&slub_lock); | 5384 | mutex_unlock(&slab_mutex); |
5391 | printk(KERN_ERR "Cannot register slab subsystem.\n"); | 5385 | printk(KERN_ERR "Cannot register slab subsystem.\n"); |
5392 | return -ENOSYS; | 5386 | return -ENOSYS; |
5393 | } | 5387 | } |
5394 | 5388 | ||
5395 | slab_state = SYSFS; | 5389 | slab_state = FULL; |
5396 | 5390 | ||
5397 | list_for_each_entry(s, &slab_caches, list) { | 5391 | list_for_each_entry(s, &slab_caches, list) { |
5398 | err = sysfs_slab_add(s); | 5392 | err = sysfs_slab_add(s); |
@@ -5408,11 +5402,11 @@ static int __init slab_sysfs_init(void) | |||
5408 | err = sysfs_slab_alias(al->s, al->name); | 5402 | err = sysfs_slab_alias(al->s, al->name); |
5409 | if (err) | 5403 | if (err) |
5410 | printk(KERN_ERR "SLUB: Unable to add boot slab alias" | 5404 | printk(KERN_ERR "SLUB: Unable to add boot slab alias" |
5411 | " %s to sysfs\n", s->name); | 5405 | " %s to sysfs\n", al->name); |
5412 | kfree(al); | 5406 | kfree(al); |
5413 | } | 5407 | } |
5414 | 5408 | ||
5415 | up_write(&slub_lock); | 5409 | mutex_unlock(&slab_mutex); |
5416 | resiliency_test(); | 5410 | resiliency_test(); |
5417 | return 0; | 5411 | return 0; |
5418 | } | 5412 | } |
@@ -5427,7 +5421,7 @@ __initcall(slab_sysfs_init); | |||
5427 | static void print_slabinfo_header(struct seq_file *m) | 5421 | static void print_slabinfo_header(struct seq_file *m) |
5428 | { | 5422 | { |
5429 | seq_puts(m, "slabinfo - version: 2.1\n"); | 5423 | seq_puts(m, "slabinfo - version: 2.1\n"); |
5430 | seq_puts(m, "# name <active_objs> <num_objs> <objsize> " | 5424 | seq_puts(m, "# name <active_objs> <num_objs> <object_size> " |
5431 | "<objperslab> <pagesperslab>"); | 5425 | "<objperslab> <pagesperslab>"); |
5432 | seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>"); | 5426 | seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>"); |
5433 | seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>"); | 5427 | seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>"); |
@@ -5438,7 +5432,7 @@ static void *s_start(struct seq_file *m, loff_t *pos) | |||
5438 | { | 5432 | { |
5439 | loff_t n = *pos; | 5433 | loff_t n = *pos; |
5440 | 5434 | ||
5441 | down_read(&slub_lock); | 5435 | mutex_lock(&slab_mutex); |
5442 | if (!n) | 5436 | if (!n) |
5443 | print_slabinfo_header(m); | 5437 | print_slabinfo_header(m); |
5444 | 5438 | ||
@@ -5452,7 +5446,7 @@ static void *s_next(struct seq_file *m, void *p, loff_t *pos) | |||
5452 | 5446 | ||
5453 | static void s_stop(struct seq_file *m, void *p) | 5447 | static void s_stop(struct seq_file *m, void *p) |
5454 | { | 5448 | { |
5455 | up_read(&slub_lock); | 5449 | mutex_unlock(&slab_mutex); |
5456 | } | 5450 | } |
5457 | 5451 | ||
5458 | static int s_show(struct seq_file *m, void *p) | 5452 | static int s_show(struct seq_file *m, void *p) |