diff options
author | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-01-17 16:15:55 -0500 |
---|---|---|
committer | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-01-17 16:15:55 -0500 |
commit | 8dea78da5cee153b8af9c07a2745f6c55057fe12 (patch) | |
tree | a8f4d49d63b1ecc92f2fddceba0655b2472c5bd9 /mm/slub.c | |
parent | 406089d01562f1e2bf9f089fd7637009ebaad589 (diff) |
Patched in Tegra support.
Diffstat (limited to 'mm/slub.c')
-rw-r--r-- | mm/slub.c | 1598 |
1 files changed, 711 insertions, 887 deletions
@@ -16,7 +16,6 @@ | |||
16 | #include <linux/interrupt.h> | 16 | #include <linux/interrupt.h> |
17 | #include <linux/bitops.h> | 17 | #include <linux/bitops.h> |
18 | #include <linux/slab.h> | 18 | #include <linux/slab.h> |
19 | #include "slab.h" | ||
20 | #include <linux/proc_fs.h> | 19 | #include <linux/proc_fs.h> |
21 | #include <linux/seq_file.h> | 20 | #include <linux/seq_file.h> |
22 | #include <linux/kmemcheck.h> | 21 | #include <linux/kmemcheck.h> |
@@ -30,22 +29,18 @@ | |||
30 | #include <linux/math64.h> | 29 | #include <linux/math64.h> |
31 | #include <linux/fault-inject.h> | 30 | #include <linux/fault-inject.h> |
32 | #include <linux/stacktrace.h> | 31 | #include <linux/stacktrace.h> |
33 | #include <linux/prefetch.h> | ||
34 | #include <linux/memcontrol.h> | ||
35 | 32 | ||
36 | #include <trace/events/kmem.h> | 33 | #include <trace/events/kmem.h> |
37 | 34 | ||
38 | #include "internal.h" | ||
39 | |||
40 | /* | 35 | /* |
41 | * Lock order: | 36 | * Lock order: |
42 | * 1. slab_mutex (Global Mutex) | 37 | * 1. slub_lock (Global Semaphore) |
43 | * 2. node->list_lock | 38 | * 2. node->list_lock |
44 | * 3. slab_lock(page) (Only on some arches and for debugging) | 39 | * 3. slab_lock(page) (Only on some arches and for debugging) |
45 | * | 40 | * |
46 | * slab_mutex | 41 | * slub_lock |
47 | * | 42 | * |
48 | * The role of the slab_mutex is to protect the list of all the slabs | 43 | * The role of the slub_lock is to protect the list of all the slabs |
49 | * and to synchronize major metadata changes to slab cache structures. | 44 | * and to synchronize major metadata changes to slab cache structures. |
50 | * | 45 | * |
51 | * The slab_lock is only used for debugging and on arches that do not | 46 | * The slab_lock is only used for debugging and on arches that do not |
@@ -113,6 +108,9 @@ | |||
113 | * the fast path and disables lockless freelists. | 108 | * the fast path and disables lockless freelists. |
114 | */ | 109 | */ |
115 | 110 | ||
111 | #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ | ||
112 | SLAB_TRACE | SLAB_DEBUG_FREE) | ||
113 | |||
116 | static inline int kmem_cache_debug(struct kmem_cache *s) | 114 | static inline int kmem_cache_debug(struct kmem_cache *s) |
117 | { | 115 | { |
118 | #ifdef CONFIG_SLUB_DEBUG | 116 | #ifdef CONFIG_SLUB_DEBUG |
@@ -177,10 +175,23 @@ static inline int kmem_cache_debug(struct kmem_cache *s) | |||
177 | #define __OBJECT_POISON 0x80000000UL /* Poison object */ | 175 | #define __OBJECT_POISON 0x80000000UL /* Poison object */ |
178 | #define __CMPXCHG_DOUBLE 0x40000000UL /* Use cmpxchg_double */ | 176 | #define __CMPXCHG_DOUBLE 0x40000000UL /* Use cmpxchg_double */ |
179 | 177 | ||
178 | static int kmem_size = sizeof(struct kmem_cache); | ||
179 | |||
180 | #ifdef CONFIG_SMP | 180 | #ifdef CONFIG_SMP |
181 | static struct notifier_block slab_notifier; | 181 | static struct notifier_block slab_notifier; |
182 | #endif | 182 | #endif |
183 | 183 | ||
184 | static enum { | ||
185 | DOWN, /* No slab functionality available */ | ||
186 | PARTIAL, /* Kmem_cache_node works */ | ||
187 | UP, /* Everything works but does not show up in sysfs */ | ||
188 | SYSFS /* Sysfs up */ | ||
189 | } slab_state = DOWN; | ||
190 | |||
191 | /* A list of all slab caches on the system */ | ||
192 | static DECLARE_RWSEM(slub_lock); | ||
193 | static LIST_HEAD(slab_caches); | ||
194 | |||
184 | /* | 195 | /* |
185 | * Tracking user of a slab. | 196 | * Tracking user of a slab. |
186 | */ | 197 | */ |
@@ -201,14 +212,17 @@ enum track_item { TRACK_ALLOC, TRACK_FREE }; | |||
201 | static int sysfs_slab_add(struct kmem_cache *); | 212 | static int sysfs_slab_add(struct kmem_cache *); |
202 | static int sysfs_slab_alias(struct kmem_cache *, const char *); | 213 | static int sysfs_slab_alias(struct kmem_cache *, const char *); |
203 | static void sysfs_slab_remove(struct kmem_cache *); | 214 | static void sysfs_slab_remove(struct kmem_cache *); |
204 | static void memcg_propagate_slab_attrs(struct kmem_cache *s); | 215 | |
205 | #else | 216 | #else |
206 | static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; } | 217 | static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; } |
207 | static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p) | 218 | static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p) |
208 | { return 0; } | 219 | { return 0; } |
209 | static inline void sysfs_slab_remove(struct kmem_cache *s) { } | 220 | static inline void sysfs_slab_remove(struct kmem_cache *s) |
221 | { | ||
222 | kfree(s->name); | ||
223 | kfree(s); | ||
224 | } | ||
210 | 225 | ||
211 | static inline void memcg_propagate_slab_attrs(struct kmem_cache *s) { } | ||
212 | #endif | 226 | #endif |
213 | 227 | ||
214 | static inline void stat(const struct kmem_cache *s, enum stat_item si) | 228 | static inline void stat(const struct kmem_cache *s, enum stat_item si) |
@@ -222,6 +236,11 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si) | |||
222 | * Core slab cache functions | 236 | * Core slab cache functions |
223 | *******************************************************************/ | 237 | *******************************************************************/ |
224 | 238 | ||
239 | int slab_is_available(void) | ||
240 | { | ||
241 | return slab_state >= UP; | ||
242 | } | ||
243 | |||
225 | static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) | 244 | static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) |
226 | { | 245 | { |
227 | return s->node[node]; | 246 | return s->node[node]; |
@@ -250,11 +269,6 @@ static inline void *get_freepointer(struct kmem_cache *s, void *object) | |||
250 | return *(void **)(object + s->offset); | 269 | return *(void **)(object + s->offset); |
251 | } | 270 | } |
252 | 271 | ||
253 | static void prefetch_freepointer(const struct kmem_cache *s, void *object) | ||
254 | { | ||
255 | prefetch(object + s->offset); | ||
256 | } | ||
257 | |||
258 | static inline void *get_freepointer_safe(struct kmem_cache *s, void *object) | 272 | static inline void *get_freepointer_safe(struct kmem_cache *s, void *object) |
259 | { | 273 | { |
260 | void *p; | 274 | void *p; |
@@ -291,7 +305,7 @@ static inline size_t slab_ksize(const struct kmem_cache *s) | |||
291 | * and whatever may come after it. | 305 | * and whatever may come after it. |
292 | */ | 306 | */ |
293 | if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) | 307 | if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) |
294 | return s->object_size; | 308 | return s->objsize; |
295 | 309 | ||
296 | #endif | 310 | #endif |
297 | /* | 311 | /* |
@@ -352,10 +366,9 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page | |||
352 | const char *n) | 366 | const char *n) |
353 | { | 367 | { |
354 | VM_BUG_ON(!irqs_disabled()); | 368 | VM_BUG_ON(!irqs_disabled()); |
355 | #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \ | 369 | #ifdef CONFIG_CMPXCHG_DOUBLE |
356 | defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) | ||
357 | if (s->flags & __CMPXCHG_DOUBLE) { | 370 | if (s->flags & __CMPXCHG_DOUBLE) { |
358 | if (cmpxchg_double(&page->freelist, &page->counters, | 371 | if (cmpxchg_double(&page->freelist, |
359 | freelist_old, counters_old, | 372 | freelist_old, counters_old, |
360 | freelist_new, counters_new)) | 373 | freelist_new, counters_new)) |
361 | return 1; | 374 | return 1; |
@@ -387,10 +400,9 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page, | |||
387 | void *freelist_new, unsigned long counters_new, | 400 | void *freelist_new, unsigned long counters_new, |
388 | const char *n) | 401 | const char *n) |
389 | { | 402 | { |
390 | #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \ | 403 | #ifdef CONFIG_CMPXCHG_DOUBLE |
391 | defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) | ||
392 | if (s->flags & __CMPXCHG_DOUBLE) { | 404 | if (s->flags & __CMPXCHG_DOUBLE) { |
393 | if (cmpxchg_double(&page->freelist, &page->counters, | 405 | if (cmpxchg_double(&page->freelist, |
394 | freelist_old, counters_old, | 406 | freelist_old, counters_old, |
395 | freelist_new, counters_new)) | 407 | freelist_new, counters_new)) |
396 | return 1; | 408 | return 1; |
@@ -455,8 +467,34 @@ static int disable_higher_order_debug; | |||
455 | */ | 467 | */ |
456 | static void print_section(char *text, u8 *addr, unsigned int length) | 468 | static void print_section(char *text, u8 *addr, unsigned int length) |
457 | { | 469 | { |
458 | print_hex_dump(KERN_ERR, text, DUMP_PREFIX_ADDRESS, 16, 1, addr, | 470 | int i, offset; |
459 | length, 1); | 471 | int newline = 1; |
472 | char ascii[17]; | ||
473 | |||
474 | ascii[16] = 0; | ||
475 | |||
476 | for (i = 0; i < length; i++) { | ||
477 | if (newline) { | ||
478 | printk(KERN_ERR "%8s 0x%p: ", text, addr + i); | ||
479 | newline = 0; | ||
480 | } | ||
481 | printk(KERN_CONT " %02x", addr[i]); | ||
482 | offset = i % 16; | ||
483 | ascii[offset] = isgraph(addr[i]) ? addr[i] : '.'; | ||
484 | if (offset == 15) { | ||
485 | printk(KERN_CONT " %s\n", ascii); | ||
486 | newline = 1; | ||
487 | } | ||
488 | } | ||
489 | if (!newline) { | ||
490 | i %= 16; | ||
491 | while (i < 16) { | ||
492 | printk(KERN_CONT " "); | ||
493 | ascii[i] = ' '; | ||
494 | i++; | ||
495 | } | ||
496 | printk(KERN_CONT " %s\n", ascii); | ||
497 | } | ||
460 | } | 498 | } |
461 | 499 | ||
462 | static struct track *get_track(struct kmem_cache *s, void *object, | 500 | static struct track *get_track(struct kmem_cache *s, void *object, |
@@ -558,11 +596,9 @@ static void slab_bug(struct kmem_cache *s, char *fmt, ...) | |||
558 | va_end(args); | 596 | va_end(args); |
559 | printk(KERN_ERR "========================================" | 597 | printk(KERN_ERR "========================================" |
560 | "=====================================\n"); | 598 | "=====================================\n"); |
561 | printk(KERN_ERR "BUG %s (%s): %s\n", s->name, print_tainted(), buf); | 599 | printk(KERN_ERR "BUG %s: %s\n", s->name, buf); |
562 | printk(KERN_ERR "----------------------------------------" | 600 | printk(KERN_ERR "----------------------------------------" |
563 | "-------------------------------------\n\n"); | 601 | "-------------------------------------\n\n"); |
564 | |||
565 | add_taint(TAINT_BAD_PAGE); | ||
566 | } | 602 | } |
567 | 603 | ||
568 | static void slab_fix(struct kmem_cache *s, char *fmt, ...) | 604 | static void slab_fix(struct kmem_cache *s, char *fmt, ...) |
@@ -589,13 +625,13 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) | |||
589 | p, p - addr, get_freepointer(s, p)); | 625 | p, p - addr, get_freepointer(s, p)); |
590 | 626 | ||
591 | if (p > addr + 16) | 627 | if (p > addr + 16) |
592 | print_section("Bytes b4 ", p - 16, 16); | 628 | print_section("Bytes b4", p - 16, 16); |
629 | |||
630 | print_section("Object", p, min_t(unsigned long, s->objsize, PAGE_SIZE)); | ||
593 | 631 | ||
594 | print_section("Object ", p, min_t(unsigned long, s->object_size, | ||
595 | PAGE_SIZE)); | ||
596 | if (s->flags & SLAB_RED_ZONE) | 632 | if (s->flags & SLAB_RED_ZONE) |
597 | print_section("Redzone ", p + s->object_size, | 633 | print_section("Redzone", p + s->objsize, |
598 | s->inuse - s->object_size); | 634 | s->inuse - s->objsize); |
599 | 635 | ||
600 | if (s->offset) | 636 | if (s->offset) |
601 | off = s->offset + sizeof(void *); | 637 | off = s->offset + sizeof(void *); |
@@ -607,7 +643,7 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) | |||
607 | 643 | ||
608 | if (off != s->size) | 644 | if (off != s->size) |
609 | /* Beginning of the filler is the free pointer */ | 645 | /* Beginning of the filler is the free pointer */ |
610 | print_section("Padding ", p + off, s->size - off); | 646 | print_section("Padding", p + off, s->size - off); |
611 | 647 | ||
612 | dump_stack(); | 648 | dump_stack(); |
613 | } | 649 | } |
@@ -619,7 +655,7 @@ static void object_err(struct kmem_cache *s, struct page *page, | |||
619 | print_trailer(s, page, object); | 655 | print_trailer(s, page, object); |
620 | } | 656 | } |
621 | 657 | ||
622 | static void slab_err(struct kmem_cache *s, struct page *page, const char *fmt, ...) | 658 | static void slab_err(struct kmem_cache *s, struct page *page, char *fmt, ...) |
623 | { | 659 | { |
624 | va_list args; | 660 | va_list args; |
625 | char buf[100]; | 661 | char buf[100]; |
@@ -637,12 +673,55 @@ static void init_object(struct kmem_cache *s, void *object, u8 val) | |||
637 | u8 *p = object; | 673 | u8 *p = object; |
638 | 674 | ||
639 | if (s->flags & __OBJECT_POISON) { | 675 | if (s->flags & __OBJECT_POISON) { |
640 | memset(p, POISON_FREE, s->object_size - 1); | 676 | memset(p, POISON_FREE, s->objsize - 1); |
641 | p[s->object_size - 1] = POISON_END; | 677 | p[s->objsize - 1] = POISON_END; |
642 | } | 678 | } |
643 | 679 | ||
644 | if (s->flags & SLAB_RED_ZONE) | 680 | if (s->flags & SLAB_RED_ZONE) |
645 | memset(p + s->object_size, val, s->inuse - s->object_size); | 681 | memset(p + s->objsize, val, s->inuse - s->objsize); |
682 | } | ||
683 | |||
684 | static u8 *check_bytes8(u8 *start, u8 value, unsigned int bytes) | ||
685 | { | ||
686 | while (bytes) { | ||
687 | if (*start != value) | ||
688 | return start; | ||
689 | start++; | ||
690 | bytes--; | ||
691 | } | ||
692 | return NULL; | ||
693 | } | ||
694 | |||
695 | static u8 *check_bytes(u8 *start, u8 value, unsigned int bytes) | ||
696 | { | ||
697 | u64 value64; | ||
698 | unsigned int words, prefix; | ||
699 | |||
700 | if (bytes <= 16) | ||
701 | return check_bytes8(start, value, bytes); | ||
702 | |||
703 | value64 = value | value << 8 | value << 16 | value << 24; | ||
704 | value64 = (value64 & 0xffffffff) | value64 << 32; | ||
705 | prefix = 8 - ((unsigned long)start) % 8; | ||
706 | |||
707 | if (prefix) { | ||
708 | u8 *r = check_bytes8(start, value, prefix); | ||
709 | if (r) | ||
710 | return r; | ||
711 | start += prefix; | ||
712 | bytes -= prefix; | ||
713 | } | ||
714 | |||
715 | words = bytes / 8; | ||
716 | |||
717 | while (words) { | ||
718 | if (*(u64 *)start != value64) | ||
719 | return check_bytes8(start, value, 8); | ||
720 | start += 8; | ||
721 | words--; | ||
722 | } | ||
723 | |||
724 | return check_bytes8(start, value, bytes % 8); | ||
646 | } | 725 | } |
647 | 726 | ||
648 | static void restore_bytes(struct kmem_cache *s, char *message, u8 data, | 727 | static void restore_bytes(struct kmem_cache *s, char *message, u8 data, |
@@ -659,7 +738,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page, | |||
659 | u8 *fault; | 738 | u8 *fault; |
660 | u8 *end; | 739 | u8 *end; |
661 | 740 | ||
662 | fault = memchr_inv(start, value, bytes); | 741 | fault = check_bytes(start, value, bytes); |
663 | if (!fault) | 742 | if (!fault) |
664 | return 1; | 743 | return 1; |
665 | 744 | ||
@@ -687,10 +766,10 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page, | |||
687 | * Poisoning uses 0x6b (POISON_FREE) and the last byte is | 766 | * Poisoning uses 0x6b (POISON_FREE) and the last byte is |
688 | * 0xa5 (POISON_END) | 767 | * 0xa5 (POISON_END) |
689 | * | 768 | * |
690 | * object + s->object_size | 769 | * object + s->objsize |
691 | * Padding to reach word boundary. This is also used for Redzoning. | 770 | * Padding to reach word boundary. This is also used for Redzoning. |
692 | * Padding is extended by another word if Redzoning is enabled and | 771 | * Padding is extended by another word if Redzoning is enabled and |
693 | * object_size == inuse. | 772 | * objsize == inuse. |
694 | * | 773 | * |
695 | * We fill with 0xbb (RED_INACTIVE) for inactive objects and with | 774 | * We fill with 0xbb (RED_INACTIVE) for inactive objects and with |
696 | * 0xcc (RED_ACTIVE) for objects in use. | 775 | * 0xcc (RED_ACTIVE) for objects in use. |
@@ -709,7 +788,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page, | |||
709 | * object + s->size | 788 | * object + s->size |
710 | * Nothing is used beyond s->size. | 789 | * Nothing is used beyond s->size. |
711 | * | 790 | * |
712 | * If slabcaches are merged then the object_size and inuse boundaries are mostly | 791 | * If slabcaches are merged then the objsize and inuse boundaries are mostly |
713 | * ignored. And therefore no slab options that rely on these boundaries | 792 | * ignored. And therefore no slab options that rely on these boundaries |
714 | * may be used with merged slabcaches. | 793 | * may be used with merged slabcaches. |
715 | */ | 794 | */ |
@@ -752,14 +831,14 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page) | |||
752 | if (!remainder) | 831 | if (!remainder) |
753 | return 1; | 832 | return 1; |
754 | 833 | ||
755 | fault = memchr_inv(end - remainder, POISON_INUSE, remainder); | 834 | fault = check_bytes(end - remainder, POISON_INUSE, remainder); |
756 | if (!fault) | 835 | if (!fault) |
757 | return 1; | 836 | return 1; |
758 | while (end > fault && end[-1] == POISON_INUSE) | 837 | while (end > fault && end[-1] == POISON_INUSE) |
759 | end--; | 838 | end--; |
760 | 839 | ||
761 | slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1); | 840 | slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1); |
762 | print_section("Padding ", end - remainder, remainder); | 841 | print_section("Padding", end - remainder, remainder); |
763 | 842 | ||
764 | restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end); | 843 | restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end); |
765 | return 0; | 844 | return 0; |
@@ -769,25 +848,25 @@ static int check_object(struct kmem_cache *s, struct page *page, | |||
769 | void *object, u8 val) | 848 | void *object, u8 val) |
770 | { | 849 | { |
771 | u8 *p = object; | 850 | u8 *p = object; |
772 | u8 *endobject = object + s->object_size; | 851 | u8 *endobject = object + s->objsize; |
773 | 852 | ||
774 | if (s->flags & SLAB_RED_ZONE) { | 853 | if (s->flags & SLAB_RED_ZONE) { |
775 | if (!check_bytes_and_report(s, page, object, "Redzone", | 854 | if (!check_bytes_and_report(s, page, object, "Redzone", |
776 | endobject, val, s->inuse - s->object_size)) | 855 | endobject, val, s->inuse - s->objsize)) |
777 | return 0; | 856 | return 0; |
778 | } else { | 857 | } else { |
779 | if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) { | 858 | if ((s->flags & SLAB_POISON) && s->objsize < s->inuse) { |
780 | check_bytes_and_report(s, page, p, "Alignment padding", | 859 | check_bytes_and_report(s, page, p, "Alignment padding", |
781 | endobject, POISON_INUSE, s->inuse - s->object_size); | 860 | endobject, POISON_INUSE, s->inuse - s->objsize); |
782 | } | 861 | } |
783 | } | 862 | } |
784 | 863 | ||
785 | if (s->flags & SLAB_POISON) { | 864 | if (s->flags & SLAB_POISON) { |
786 | if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) && | 865 | if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) && |
787 | (!check_bytes_and_report(s, page, p, "Poison", p, | 866 | (!check_bytes_and_report(s, page, p, "Poison", p, |
788 | POISON_FREE, s->object_size - 1) || | 867 | POISON_FREE, s->objsize - 1) || |
789 | !check_bytes_and_report(s, page, p, "Poison", | 868 | !check_bytes_and_report(s, page, p, "Poison", |
790 | p + s->object_size - 1, POISON_END, 1))) | 869 | p + s->objsize - 1, POISON_END, 1))) |
791 | return 0; | 870 | return 0; |
792 | /* | 871 | /* |
793 | * check_pad_bytes cleans up on its own. | 872 | * check_pad_bytes cleans up on its own. |
@@ -908,7 +987,7 @@ static void trace(struct kmem_cache *s, struct page *page, void *object, | |||
908 | page->freelist); | 987 | page->freelist); |
909 | 988 | ||
910 | if (!alloc) | 989 | if (!alloc) |
911 | print_section("Object ", (void *)object, s->object_size); | 990 | print_section("Object", (void *)object, s->objsize); |
912 | 991 | ||
913 | dump_stack(); | 992 | dump_stack(); |
914 | } | 993 | } |
@@ -924,14 +1003,14 @@ static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags) | |||
924 | lockdep_trace_alloc(flags); | 1003 | lockdep_trace_alloc(flags); |
925 | might_sleep_if(flags & __GFP_WAIT); | 1004 | might_sleep_if(flags & __GFP_WAIT); |
926 | 1005 | ||
927 | return should_failslab(s->object_size, flags, s->flags); | 1006 | return should_failslab(s->objsize, flags, s->flags); |
928 | } | 1007 | } |
929 | 1008 | ||
930 | static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object) | 1009 | static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object) |
931 | { | 1010 | { |
932 | flags &= gfp_allowed_mask; | 1011 | flags &= gfp_allowed_mask; |
933 | kmemcheck_slab_alloc(s, flags, object, slab_ksize(s)); | 1012 | kmemcheck_slab_alloc(s, flags, object, slab_ksize(s)); |
934 | kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags); | 1013 | kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, flags); |
935 | } | 1014 | } |
936 | 1015 | ||
937 | static inline void slab_free_hook(struct kmem_cache *s, void *x) | 1016 | static inline void slab_free_hook(struct kmem_cache *s, void *x) |
@@ -948,13 +1027,13 @@ static inline void slab_free_hook(struct kmem_cache *s, void *x) | |||
948 | unsigned long flags; | 1027 | unsigned long flags; |
949 | 1028 | ||
950 | local_irq_save(flags); | 1029 | local_irq_save(flags); |
951 | kmemcheck_slab_free(s, x, s->object_size); | 1030 | kmemcheck_slab_free(s, x, s->objsize); |
952 | debug_check_no_locks_freed(x, s->object_size); | 1031 | debug_check_no_locks_freed(x, s->objsize); |
953 | local_irq_restore(flags); | 1032 | local_irq_restore(flags); |
954 | } | 1033 | } |
955 | #endif | 1034 | #endif |
956 | if (!(s->flags & SLAB_DEBUG_OBJECTS)) | 1035 | if (!(s->flags & SLAB_DEBUG_OBJECTS)) |
957 | debug_check_no_obj_freed(x, s->object_size); | 1036 | debug_check_no_obj_freed(x, s->objsize); |
958 | } | 1037 | } |
959 | 1038 | ||
960 | /* | 1039 | /* |
@@ -1064,13 +1143,13 @@ bad: | |||
1064 | return 0; | 1143 | return 0; |
1065 | } | 1144 | } |
1066 | 1145 | ||
1067 | static noinline struct kmem_cache_node *free_debug_processing( | 1146 | static noinline int free_debug_processing(struct kmem_cache *s, |
1068 | struct kmem_cache *s, struct page *page, void *object, | 1147 | struct page *page, void *object, unsigned long addr) |
1069 | unsigned long addr, unsigned long *flags) | ||
1070 | { | 1148 | { |
1071 | struct kmem_cache_node *n = get_node(s, page_to_nid(page)); | 1149 | unsigned long flags; |
1150 | int rc = 0; | ||
1072 | 1151 | ||
1073 | spin_lock_irqsave(&n->list_lock, *flags); | 1152 | local_irq_save(flags); |
1074 | slab_lock(page); | 1153 | slab_lock(page); |
1075 | 1154 | ||
1076 | if (!check_slab(s, page)) | 1155 | if (!check_slab(s, page)) |
@@ -1089,11 +1168,11 @@ static noinline struct kmem_cache_node *free_debug_processing( | |||
1089 | if (!check_object(s, page, object, SLUB_RED_ACTIVE)) | 1168 | if (!check_object(s, page, object, SLUB_RED_ACTIVE)) |
1090 | goto out; | 1169 | goto out; |
1091 | 1170 | ||
1092 | if (unlikely(s != page->slab_cache)) { | 1171 | if (unlikely(s != page->slab)) { |
1093 | if (!PageSlab(page)) { | 1172 | if (!PageSlab(page)) { |
1094 | slab_err(s, page, "Attempt to free object(0x%p) " | 1173 | slab_err(s, page, "Attempt to free object(0x%p) " |
1095 | "outside of slab", object); | 1174 | "outside of slab", object); |
1096 | } else if (!page->slab_cache) { | 1175 | } else if (!page->slab) { |
1097 | printk(KERN_ERR | 1176 | printk(KERN_ERR |
1098 | "SLUB <none>: no slab for object 0x%p.\n", | 1177 | "SLUB <none>: no slab for object 0x%p.\n", |
1099 | object); | 1178 | object); |
@@ -1108,19 +1187,15 @@ static noinline struct kmem_cache_node *free_debug_processing( | |||
1108 | set_track(s, object, TRACK_FREE, addr); | 1187 | set_track(s, object, TRACK_FREE, addr); |
1109 | trace(s, page, object, 0); | 1188 | trace(s, page, object, 0); |
1110 | init_object(s, object, SLUB_RED_INACTIVE); | 1189 | init_object(s, object, SLUB_RED_INACTIVE); |
1190 | rc = 1; | ||
1111 | out: | 1191 | out: |
1112 | slab_unlock(page); | 1192 | slab_unlock(page); |
1113 | /* | 1193 | local_irq_restore(flags); |
1114 | * Keep node_lock to preserve integrity | 1194 | return rc; |
1115 | * until the object is actually freed | ||
1116 | */ | ||
1117 | return n; | ||
1118 | 1195 | ||
1119 | fail: | 1196 | fail: |
1120 | slab_unlock(page); | ||
1121 | spin_unlock_irqrestore(&n->list_lock, *flags); | ||
1122 | slab_fix(s, "Object at 0x%p not freed", object); | 1197 | slab_fix(s, "Object at 0x%p not freed", object); |
1123 | return NULL; | 1198 | goto out; |
1124 | } | 1199 | } |
1125 | 1200 | ||
1126 | static int __init setup_slub_debug(char *str) | 1201 | static int __init setup_slub_debug(char *str) |
@@ -1193,7 +1268,7 @@ out: | |||
1193 | 1268 | ||
1194 | __setup("slub_debug", setup_slub_debug); | 1269 | __setup("slub_debug", setup_slub_debug); |
1195 | 1270 | ||
1196 | static unsigned long kmem_cache_flags(unsigned long object_size, | 1271 | static unsigned long kmem_cache_flags(unsigned long objsize, |
1197 | unsigned long flags, const char *name, | 1272 | unsigned long flags, const char *name, |
1198 | void (*ctor)(void *)) | 1273 | void (*ctor)(void *)) |
1199 | { | 1274 | { |
@@ -1213,9 +1288,8 @@ static inline void setup_object_debug(struct kmem_cache *s, | |||
1213 | static inline int alloc_debug_processing(struct kmem_cache *s, | 1288 | static inline int alloc_debug_processing(struct kmem_cache *s, |
1214 | struct page *page, void *object, unsigned long addr) { return 0; } | 1289 | struct page *page, void *object, unsigned long addr) { return 0; } |
1215 | 1290 | ||
1216 | static inline struct kmem_cache_node *free_debug_processing( | 1291 | static inline int free_debug_processing(struct kmem_cache *s, |
1217 | struct kmem_cache *s, struct page *page, void *object, | 1292 | struct page *page, void *object, unsigned long addr) { return 0; } |
1218 | unsigned long addr, unsigned long *flags) { return NULL; } | ||
1219 | 1293 | ||
1220 | static inline int slab_pad_check(struct kmem_cache *s, struct page *page) | 1294 | static inline int slab_pad_check(struct kmem_cache *s, struct page *page) |
1221 | { return 1; } | 1295 | { return 1; } |
@@ -1224,7 +1298,7 @@ static inline int check_object(struct kmem_cache *s, struct page *page, | |||
1224 | static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n, | 1298 | static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n, |
1225 | struct page *page) {} | 1299 | struct page *page) {} |
1226 | static inline void remove_full(struct kmem_cache *s, struct page *page) {} | 1300 | static inline void remove_full(struct kmem_cache *s, struct page *page) {} |
1227 | static inline unsigned long kmem_cache_flags(unsigned long object_size, | 1301 | static inline unsigned long kmem_cache_flags(unsigned long objsize, |
1228 | unsigned long flags, const char *name, | 1302 | unsigned long flags, const char *name, |
1229 | void (*ctor)(void *)) | 1303 | void (*ctor)(void *)) |
1230 | { | 1304 | { |
@@ -1301,7 +1375,13 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
1301 | stat(s, ORDER_FALLBACK); | 1375 | stat(s, ORDER_FALLBACK); |
1302 | } | 1376 | } |
1303 | 1377 | ||
1304 | if (kmemcheck_enabled && page | 1378 | if (flags & __GFP_WAIT) |
1379 | local_irq_disable(); | ||
1380 | |||
1381 | if (!page) | ||
1382 | return NULL; | ||
1383 | |||
1384 | if (kmemcheck_enabled | ||
1305 | && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) { | 1385 | && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) { |
1306 | int pages = 1 << oo_order(oo); | 1386 | int pages = 1 << oo_order(oo); |
1307 | 1387 | ||
@@ -1317,11 +1397,6 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
1317 | kmemcheck_mark_unallocated_pages(page, pages); | 1397 | kmemcheck_mark_unallocated_pages(page, pages); |
1318 | } | 1398 | } |
1319 | 1399 | ||
1320 | if (flags & __GFP_WAIT) | ||
1321 | local_irq_disable(); | ||
1322 | if (!page) | ||
1323 | return NULL; | ||
1324 | |||
1325 | page->objects = oo_objects(oo); | 1400 | page->objects = oo_objects(oo); |
1326 | mod_zone_page_state(page_zone(page), | 1401 | mod_zone_page_state(page_zone(page), |
1327 | (s->flags & SLAB_RECLAIM_ACCOUNT) ? | 1402 | (s->flags & SLAB_RECLAIM_ACCOUNT) ? |
@@ -1345,7 +1420,6 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
1345 | void *start; | 1420 | void *start; |
1346 | void *last; | 1421 | void *last; |
1347 | void *p; | 1422 | void *p; |
1348 | int order; | ||
1349 | 1423 | ||
1350 | BUG_ON(flags & GFP_SLAB_BUG_MASK); | 1424 | BUG_ON(flags & GFP_SLAB_BUG_MASK); |
1351 | 1425 | ||
@@ -1354,18 +1428,14 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
1354 | if (!page) | 1428 | if (!page) |
1355 | goto out; | 1429 | goto out; |
1356 | 1430 | ||
1357 | order = compound_order(page); | ||
1358 | inc_slabs_node(s, page_to_nid(page), page->objects); | 1431 | inc_slabs_node(s, page_to_nid(page), page->objects); |
1359 | memcg_bind_pages(s, order); | 1432 | page->slab = s; |
1360 | page->slab_cache = s; | 1433 | page->flags |= 1 << PG_slab; |
1361 | __SetPageSlab(page); | ||
1362 | if (page->pfmemalloc) | ||
1363 | SetPageSlabPfmemalloc(page); | ||
1364 | 1434 | ||
1365 | start = page_address(page); | 1435 | start = page_address(page); |
1366 | 1436 | ||
1367 | if (unlikely(s->flags & SLAB_POISON)) | 1437 | if (unlikely(s->flags & SLAB_POISON)) |
1368 | memset(start, POISON_INUSE, PAGE_SIZE << order); | 1438 | memset(start, POISON_INUSE, PAGE_SIZE << compound_order(page)); |
1369 | 1439 | ||
1370 | last = start; | 1440 | last = start; |
1371 | for_each_object(p, s, start, page->objects) { | 1441 | for_each_object(p, s, start, page->objects) { |
@@ -1377,7 +1447,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
1377 | set_freepointer(s, last, NULL); | 1447 | set_freepointer(s, last, NULL); |
1378 | 1448 | ||
1379 | page->freelist = start; | 1449 | page->freelist = start; |
1380 | page->inuse = page->objects; | 1450 | page->inuse = 0; |
1381 | page->frozen = 1; | 1451 | page->frozen = 1; |
1382 | out: | 1452 | out: |
1383 | return page; | 1453 | return page; |
@@ -1404,14 +1474,11 @@ static void __free_slab(struct kmem_cache *s, struct page *page) | |||
1404 | NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, | 1474 | NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, |
1405 | -pages); | 1475 | -pages); |
1406 | 1476 | ||
1407 | __ClearPageSlabPfmemalloc(page); | ||
1408 | __ClearPageSlab(page); | 1477 | __ClearPageSlab(page); |
1409 | |||
1410 | memcg_release_pages(s, order); | ||
1411 | reset_page_mapcount(page); | 1478 | reset_page_mapcount(page); |
1412 | if (current->reclaim_state) | 1479 | if (current->reclaim_state) |
1413 | current->reclaim_state->reclaimed_slab += pages; | 1480 | current->reclaim_state->reclaimed_slab += pages; |
1414 | __free_memcg_kmem_pages(page, order); | 1481 | __free_pages(page, order); |
1415 | } | 1482 | } |
1416 | 1483 | ||
1417 | #define need_reserve_slab_rcu \ | 1484 | #define need_reserve_slab_rcu \ |
@@ -1426,7 +1493,7 @@ static void rcu_free_slab(struct rcu_head *h) | |||
1426 | else | 1493 | else |
1427 | page = container_of((struct list_head *)h, struct page, lru); | 1494 | page = container_of((struct list_head *)h, struct page, lru); |
1428 | 1495 | ||
1429 | __free_slab(page->slab_cache, page); | 1496 | __free_slab(page->slab, page); |
1430 | } | 1497 | } |
1431 | 1498 | ||
1432 | static void free_slab(struct kmem_cache *s, struct page *page) | 1499 | static void free_slab(struct kmem_cache *s, struct page *page) |
@@ -1467,7 +1534,7 @@ static inline void add_partial(struct kmem_cache_node *n, | |||
1467 | struct page *page, int tail) | 1534 | struct page *page, int tail) |
1468 | { | 1535 | { |
1469 | n->nr_partial++; | 1536 | n->nr_partial++; |
1470 | if (tail == DEACTIVATE_TO_TAIL) | 1537 | if (tail) |
1471 | list_add_tail(&page->lru, &n->partial); | 1538 | list_add_tail(&page->lru, &n->partial); |
1472 | else | 1539 | else |
1473 | list_add(&page->lru, &n->partial); | 1540 | list_add(&page->lru, &n->partial); |
@@ -1484,16 +1551,13 @@ static inline void remove_partial(struct kmem_cache_node *n, | |||
1484 | } | 1551 | } |
1485 | 1552 | ||
1486 | /* | 1553 | /* |
1487 | * Remove slab from the partial list, freeze it and | 1554 | * Lock slab, remove from the partial list and put the object into the |
1488 | * return the pointer to the freelist. | 1555 | * per cpu freelist. |
1489 | * | 1556 | * |
1490 | * Returns a list of objects or NULL if it fails. | 1557 | * Must hold list_lock. |
1491 | * | ||
1492 | * Must hold list_lock since we modify the partial list. | ||
1493 | */ | 1558 | */ |
1494 | static inline void *acquire_slab(struct kmem_cache *s, | 1559 | static inline int acquire_slab(struct kmem_cache *s, |
1495 | struct kmem_cache_node *n, struct page *page, | 1560 | struct kmem_cache_node *n, struct page *page) |
1496 | int mode) | ||
1497 | { | 1561 | { |
1498 | void *freelist; | 1562 | void *freelist; |
1499 | unsigned long counters; | 1563 | unsigned long counters; |
@@ -1504,41 +1568,47 @@ static inline void *acquire_slab(struct kmem_cache *s, | |||
1504 | * The old freelist is the list of objects for the | 1568 | * The old freelist is the list of objects for the |
1505 | * per cpu allocation list. | 1569 | * per cpu allocation list. |
1506 | */ | 1570 | */ |
1507 | freelist = page->freelist; | 1571 | do { |
1508 | counters = page->counters; | 1572 | freelist = page->freelist; |
1509 | new.counters = counters; | 1573 | counters = page->counters; |
1510 | if (mode) { | 1574 | new.counters = counters; |
1511 | new.inuse = page->objects; | 1575 | new.inuse = page->objects; |
1512 | new.freelist = NULL; | ||
1513 | } else { | ||
1514 | new.freelist = freelist; | ||
1515 | } | ||
1516 | 1576 | ||
1517 | VM_BUG_ON(new.frozen); | 1577 | VM_BUG_ON(new.frozen); |
1518 | new.frozen = 1; | 1578 | new.frozen = 1; |
1519 | 1579 | ||
1520 | if (!__cmpxchg_double_slab(s, page, | 1580 | } while (!__cmpxchg_double_slab(s, page, |
1521 | freelist, counters, | 1581 | freelist, counters, |
1522 | new.freelist, new.counters, | 1582 | NULL, new.counters, |
1523 | "acquire_slab")) | 1583 | "lock and freeze")); |
1524 | return NULL; | ||
1525 | 1584 | ||
1526 | remove_partial(n, page); | 1585 | remove_partial(n, page); |
1527 | WARN_ON(!freelist); | ||
1528 | return freelist; | ||
1529 | } | ||
1530 | 1586 | ||
1531 | static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain); | 1587 | if (freelist) { |
1532 | static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags); | 1588 | /* Populate the per cpu freelist */ |
1589 | this_cpu_write(s->cpu_slab->freelist, freelist); | ||
1590 | this_cpu_write(s->cpu_slab->page, page); | ||
1591 | this_cpu_write(s->cpu_slab->node, page_to_nid(page)); | ||
1592 | return 1; | ||
1593 | } else { | ||
1594 | /* | ||
1595 | * Slab page came from the wrong list. No object to allocate | ||
1596 | * from. Put it onto the correct list and continue partial | ||
1597 | * scan. | ||
1598 | */ | ||
1599 | printk(KERN_ERR "SLUB: %s : Page without available objects on" | ||
1600 | " partial list\n", s->name); | ||
1601 | return 0; | ||
1602 | } | ||
1603 | } | ||
1533 | 1604 | ||
1534 | /* | 1605 | /* |
1535 | * Try to allocate a partial slab from a specific node. | 1606 | * Try to allocate a partial slab from a specific node. |
1536 | */ | 1607 | */ |
1537 | static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, | 1608 | static struct page *get_partial_node(struct kmem_cache *s, |
1538 | struct kmem_cache_cpu *c, gfp_t flags) | 1609 | struct kmem_cache_node *n) |
1539 | { | 1610 | { |
1540 | struct page *page, *page2; | 1611 | struct page *page; |
1541 | void *object = NULL; | ||
1542 | 1612 | ||
1543 | /* | 1613 | /* |
1544 | * Racy check. If we mistakenly see no partial slabs then we | 1614 | * Racy check. If we mistakenly see no partial slabs then we |
@@ -1550,47 +1620,26 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, | |||
1550 | return NULL; | 1620 | return NULL; |
1551 | 1621 | ||
1552 | spin_lock(&n->list_lock); | 1622 | spin_lock(&n->list_lock); |
1553 | list_for_each_entry_safe(page, page2, &n->partial, lru) { | 1623 | list_for_each_entry(page, &n->partial, lru) |
1554 | void *t; | 1624 | if (acquire_slab(s, n, page)) |
1555 | int available; | 1625 | goto out; |
1556 | 1626 | page = NULL; | |
1557 | if (!pfmemalloc_match(page, flags)) | 1627 | out: |
1558 | continue; | ||
1559 | |||
1560 | t = acquire_slab(s, n, page, object == NULL); | ||
1561 | if (!t) | ||
1562 | break; | ||
1563 | |||
1564 | if (!object) { | ||
1565 | c->page = page; | ||
1566 | stat(s, ALLOC_FROM_PARTIAL); | ||
1567 | object = t; | ||
1568 | available = page->objects - page->inuse; | ||
1569 | } else { | ||
1570 | available = put_cpu_partial(s, page, 0); | ||
1571 | stat(s, CPU_PARTIAL_NODE); | ||
1572 | } | ||
1573 | if (kmem_cache_debug(s) || available > s->cpu_partial / 2) | ||
1574 | break; | ||
1575 | |||
1576 | } | ||
1577 | spin_unlock(&n->list_lock); | 1628 | spin_unlock(&n->list_lock); |
1578 | return object; | 1629 | return page; |
1579 | } | 1630 | } |
1580 | 1631 | ||
1581 | /* | 1632 | /* |
1582 | * Get a page from somewhere. Search in increasing NUMA distances. | 1633 | * Get a page from somewhere. Search in increasing NUMA distances. |
1583 | */ | 1634 | */ |
1584 | static void *get_any_partial(struct kmem_cache *s, gfp_t flags, | 1635 | static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) |
1585 | struct kmem_cache_cpu *c) | ||
1586 | { | 1636 | { |
1587 | #ifdef CONFIG_NUMA | 1637 | #ifdef CONFIG_NUMA |
1588 | struct zonelist *zonelist; | 1638 | struct zonelist *zonelist; |
1589 | struct zoneref *z; | 1639 | struct zoneref *z; |
1590 | struct zone *zone; | 1640 | struct zone *zone; |
1591 | enum zone_type high_zoneidx = gfp_zone(flags); | 1641 | enum zone_type high_zoneidx = gfp_zone(flags); |
1592 | void *object; | 1642 | struct page *page; |
1593 | unsigned int cpuset_mems_cookie; | ||
1594 | 1643 | ||
1595 | /* | 1644 | /* |
1596 | * The defrag ratio allows a configuration of the tradeoffs between | 1645 | * The defrag ratio allows a configuration of the tradeoffs between |
@@ -1614,32 +1663,23 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags, | |||
1614 | get_cycles() % 1024 > s->remote_node_defrag_ratio) | 1663 | get_cycles() % 1024 > s->remote_node_defrag_ratio) |
1615 | return NULL; | 1664 | return NULL; |
1616 | 1665 | ||
1617 | do { | 1666 | get_mems_allowed(); |
1618 | cpuset_mems_cookie = get_mems_allowed(); | 1667 | zonelist = node_zonelist(slab_node(current->mempolicy), flags); |
1619 | zonelist = node_zonelist(slab_node(), flags); | 1668 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { |
1620 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { | 1669 | struct kmem_cache_node *n; |
1621 | struct kmem_cache_node *n; | 1670 | |
1622 | 1671 | n = get_node(s, zone_to_nid(zone)); | |
1623 | n = get_node(s, zone_to_nid(zone)); | 1672 | |
1624 | 1673 | if (n && cpuset_zone_allowed_hardwall(zone, flags) && | |
1625 | if (n && cpuset_zone_allowed_hardwall(zone, flags) && | 1674 | n->nr_partial > s->min_partial) { |
1626 | n->nr_partial > s->min_partial) { | 1675 | page = get_partial_node(s, n); |
1627 | object = get_partial_node(s, n, c, flags); | 1676 | if (page) { |
1628 | if (object) { | 1677 | put_mems_allowed(); |
1629 | /* | 1678 | return page; |
1630 | * Return the object even if | ||
1631 | * put_mems_allowed indicated that | ||
1632 | * the cpuset mems_allowed was | ||
1633 | * updated in parallel. It's a | ||
1634 | * harmless race between the alloc | ||
1635 | * and the cpuset update. | ||
1636 | */ | ||
1637 | put_mems_allowed(cpuset_mems_cookie); | ||
1638 | return object; | ||
1639 | } | ||
1640 | } | 1679 | } |
1641 | } | 1680 | } |
1642 | } while (!put_mems_allowed(cpuset_mems_cookie)); | 1681 | } |
1682 | put_mems_allowed(); | ||
1643 | #endif | 1683 | #endif |
1644 | return NULL; | 1684 | return NULL; |
1645 | } | 1685 | } |
@@ -1647,17 +1687,16 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags, | |||
1647 | /* | 1687 | /* |
1648 | * Get a partial page, lock it and return it. | 1688 | * Get a partial page, lock it and return it. |
1649 | */ | 1689 | */ |
1650 | static void *get_partial(struct kmem_cache *s, gfp_t flags, int node, | 1690 | static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node) |
1651 | struct kmem_cache_cpu *c) | ||
1652 | { | 1691 | { |
1653 | void *object; | 1692 | struct page *page; |
1654 | int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node; | 1693 | int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node; |
1655 | 1694 | ||
1656 | object = get_partial_node(s, get_node(s, searchnode), c, flags); | 1695 | page = get_partial_node(s, get_node(s, searchnode)); |
1657 | if (object || node != NUMA_NO_NODE) | 1696 | if (page || node != NUMA_NO_NODE) |
1658 | return object; | 1697 | return page; |
1659 | 1698 | ||
1660 | return get_any_partial(s, flags, c); | 1699 | return get_any_partial(s, flags); |
1661 | } | 1700 | } |
1662 | 1701 | ||
1663 | #ifdef CONFIG_PREEMPT | 1702 | #ifdef CONFIG_PREEMPT |
@@ -1719,33 +1758,43 @@ static inline void note_cmpxchg_failure(const char *n, | |||
1719 | stat(s, CMPXCHG_DOUBLE_CPU_FAIL); | 1758 | stat(s, CMPXCHG_DOUBLE_CPU_FAIL); |
1720 | } | 1759 | } |
1721 | 1760 | ||
1722 | static void init_kmem_cache_cpus(struct kmem_cache *s) | 1761 | void init_kmem_cache_cpus(struct kmem_cache *s) |
1723 | { | 1762 | { |
1724 | int cpu; | 1763 | int cpu; |
1725 | 1764 | ||
1726 | for_each_possible_cpu(cpu) | 1765 | for_each_possible_cpu(cpu) |
1727 | per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu); | 1766 | per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu); |
1728 | } | 1767 | } |
1768 | /* | ||
1769 | * Remove the cpu slab | ||
1770 | */ | ||
1729 | 1771 | ||
1730 | /* | 1772 | /* |
1731 | * Remove the cpu slab | 1773 | * Remove the cpu slab |
1732 | */ | 1774 | */ |
1733 | static void deactivate_slab(struct kmem_cache *s, struct page *page, void *freelist) | 1775 | static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) |
1734 | { | 1776 | { |
1735 | enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE }; | 1777 | enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE }; |
1778 | struct page *page = c->page; | ||
1736 | struct kmem_cache_node *n = get_node(s, page_to_nid(page)); | 1779 | struct kmem_cache_node *n = get_node(s, page_to_nid(page)); |
1737 | int lock = 0; | 1780 | int lock = 0; |
1738 | enum slab_modes l = M_NONE, m = M_NONE; | 1781 | enum slab_modes l = M_NONE, m = M_NONE; |
1782 | void *freelist; | ||
1739 | void *nextfree; | 1783 | void *nextfree; |
1740 | int tail = DEACTIVATE_TO_HEAD; | 1784 | int tail = 0; |
1741 | struct page new; | 1785 | struct page new; |
1742 | struct page old; | 1786 | struct page old; |
1743 | 1787 | ||
1744 | if (page->freelist) { | 1788 | if (page->freelist) { |
1745 | stat(s, DEACTIVATE_REMOTE_FREES); | 1789 | stat(s, DEACTIVATE_REMOTE_FREES); |
1746 | tail = DEACTIVATE_TO_TAIL; | 1790 | tail = 1; |
1747 | } | 1791 | } |
1748 | 1792 | ||
1793 | c->tid = next_tid(c->tid); | ||
1794 | c->page = NULL; | ||
1795 | freelist = c->freelist; | ||
1796 | c->freelist = NULL; | ||
1797 | |||
1749 | /* | 1798 | /* |
1750 | * Stage one: Free all available per cpu objects back | 1799 | * Stage one: Free all available per cpu objects back |
1751 | * to the page freelist while it is still frozen. Leave the | 1800 | * to the page freelist while it is still frozen. Leave the |
@@ -1844,7 +1893,7 @@ redo: | |||
1844 | if (m == M_PARTIAL) { | 1893 | if (m == M_PARTIAL) { |
1845 | 1894 | ||
1846 | add_partial(n, page, tail); | 1895 | add_partial(n, page, tail); |
1847 | stat(s, tail); | 1896 | stat(s, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD); |
1848 | 1897 | ||
1849 | } else if (m == M_FULL) { | 1898 | } else if (m == M_FULL) { |
1850 | 1899 | ||
@@ -1871,130 +1920,10 @@ redo: | |||
1871 | } | 1920 | } |
1872 | } | 1921 | } |
1873 | 1922 | ||
1874 | /* | ||
1875 | * Unfreeze all the cpu partial slabs. | ||
1876 | * | ||
1877 | * This function must be called with interrupts disabled | ||
1878 | * for the cpu using c (or some other guarantee must be there | ||
1879 | * to guarantee no concurrent accesses). | ||
1880 | */ | ||
1881 | static void unfreeze_partials(struct kmem_cache *s, | ||
1882 | struct kmem_cache_cpu *c) | ||
1883 | { | ||
1884 | struct kmem_cache_node *n = NULL, *n2 = NULL; | ||
1885 | struct page *page, *discard_page = NULL; | ||
1886 | |||
1887 | while ((page = c->partial)) { | ||
1888 | struct page new; | ||
1889 | struct page old; | ||
1890 | |||
1891 | c->partial = page->next; | ||
1892 | |||
1893 | n2 = get_node(s, page_to_nid(page)); | ||
1894 | if (n != n2) { | ||
1895 | if (n) | ||
1896 | spin_unlock(&n->list_lock); | ||
1897 | |||
1898 | n = n2; | ||
1899 | spin_lock(&n->list_lock); | ||
1900 | } | ||
1901 | |||
1902 | do { | ||
1903 | |||
1904 | old.freelist = page->freelist; | ||
1905 | old.counters = page->counters; | ||
1906 | VM_BUG_ON(!old.frozen); | ||
1907 | |||
1908 | new.counters = old.counters; | ||
1909 | new.freelist = old.freelist; | ||
1910 | |||
1911 | new.frozen = 0; | ||
1912 | |||
1913 | } while (!__cmpxchg_double_slab(s, page, | ||
1914 | old.freelist, old.counters, | ||
1915 | new.freelist, new.counters, | ||
1916 | "unfreezing slab")); | ||
1917 | |||
1918 | if (unlikely(!new.inuse && n->nr_partial > s->min_partial)) { | ||
1919 | page->next = discard_page; | ||
1920 | discard_page = page; | ||
1921 | } else { | ||
1922 | add_partial(n, page, DEACTIVATE_TO_TAIL); | ||
1923 | stat(s, FREE_ADD_PARTIAL); | ||
1924 | } | ||
1925 | } | ||
1926 | |||
1927 | if (n) | ||
1928 | spin_unlock(&n->list_lock); | ||
1929 | |||
1930 | while (discard_page) { | ||
1931 | page = discard_page; | ||
1932 | discard_page = discard_page->next; | ||
1933 | |||
1934 | stat(s, DEACTIVATE_EMPTY); | ||
1935 | discard_slab(s, page); | ||
1936 | stat(s, FREE_SLAB); | ||
1937 | } | ||
1938 | } | ||
1939 | |||
1940 | /* | ||
1941 | * Put a page that was just frozen (in __slab_free) into a partial page | ||
1942 | * slot if available. This is done without interrupts disabled and without | ||
1943 | * preemption disabled. The cmpxchg is racy and may put the partial page | ||
1944 | * onto a random cpus partial slot. | ||
1945 | * | ||
1946 | * If we did not find a slot then simply move all the partials to the | ||
1947 | * per node partial list. | ||
1948 | */ | ||
1949 | static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) | ||
1950 | { | ||
1951 | struct page *oldpage; | ||
1952 | int pages; | ||
1953 | int pobjects; | ||
1954 | |||
1955 | do { | ||
1956 | pages = 0; | ||
1957 | pobjects = 0; | ||
1958 | oldpage = this_cpu_read(s->cpu_slab->partial); | ||
1959 | |||
1960 | if (oldpage) { | ||
1961 | pobjects = oldpage->pobjects; | ||
1962 | pages = oldpage->pages; | ||
1963 | if (drain && pobjects > s->cpu_partial) { | ||
1964 | unsigned long flags; | ||
1965 | /* | ||
1966 | * partial array is full. Move the existing | ||
1967 | * set to the per node partial list. | ||
1968 | */ | ||
1969 | local_irq_save(flags); | ||
1970 | unfreeze_partials(s, this_cpu_ptr(s->cpu_slab)); | ||
1971 | local_irq_restore(flags); | ||
1972 | oldpage = NULL; | ||
1973 | pobjects = 0; | ||
1974 | pages = 0; | ||
1975 | stat(s, CPU_PARTIAL_DRAIN); | ||
1976 | } | ||
1977 | } | ||
1978 | |||
1979 | pages++; | ||
1980 | pobjects += page->objects - page->inuse; | ||
1981 | |||
1982 | page->pages = pages; | ||
1983 | page->pobjects = pobjects; | ||
1984 | page->next = oldpage; | ||
1985 | |||
1986 | } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage); | ||
1987 | return pobjects; | ||
1988 | } | ||
1989 | |||
1990 | static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) | 1923 | static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) |
1991 | { | 1924 | { |
1992 | stat(s, CPUSLAB_FLUSH); | 1925 | stat(s, CPUSLAB_FLUSH); |
1993 | deactivate_slab(s, c->page, c->freelist); | 1926 | deactivate_slab(s, c); |
1994 | |||
1995 | c->tid = next_tid(c->tid); | ||
1996 | c->page = NULL; | ||
1997 | c->freelist = NULL; | ||
1998 | } | 1927 | } |
1999 | 1928 | ||
2000 | /* | 1929 | /* |
@@ -2006,12 +1935,8 @@ static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) | |||
2006 | { | 1935 | { |
2007 | struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); | 1936 | struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); |
2008 | 1937 | ||
2009 | if (likely(c)) { | 1938 | if (likely(c && c->page)) |
2010 | if (c->page) | 1939 | flush_slab(s, c); |
2011 | flush_slab(s, c); | ||
2012 | |||
2013 | unfreeze_partials(s, c); | ||
2014 | } | ||
2015 | } | 1940 | } |
2016 | 1941 | ||
2017 | static void flush_cpu_slab(void *d) | 1942 | static void flush_cpu_slab(void *d) |
@@ -2021,27 +1946,19 @@ static void flush_cpu_slab(void *d) | |||
2021 | __flush_cpu_slab(s, smp_processor_id()); | 1946 | __flush_cpu_slab(s, smp_processor_id()); |
2022 | } | 1947 | } |
2023 | 1948 | ||
2024 | static bool has_cpu_slab(int cpu, void *info) | ||
2025 | { | ||
2026 | struct kmem_cache *s = info; | ||
2027 | struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); | ||
2028 | |||
2029 | return c->page || c->partial; | ||
2030 | } | ||
2031 | |||
2032 | static void flush_all(struct kmem_cache *s) | 1949 | static void flush_all(struct kmem_cache *s) |
2033 | { | 1950 | { |
2034 | on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC); | 1951 | on_each_cpu(flush_cpu_slab, s, 1); |
2035 | } | 1952 | } |
2036 | 1953 | ||
2037 | /* | 1954 | /* |
2038 | * Check if the objects in a per cpu structure fit numa | 1955 | * Check if the objects in a per cpu structure fit numa |
2039 | * locality expectations. | 1956 | * locality expectations. |
2040 | */ | 1957 | */ |
2041 | static inline int node_match(struct page *page, int node) | 1958 | static inline int node_match(struct kmem_cache_cpu *c, int node) |
2042 | { | 1959 | { |
2043 | #ifdef CONFIG_NUMA | 1960 | #ifdef CONFIG_NUMA |
2044 | if (node != NUMA_NO_NODE && page_to_nid(page) != node) | 1961 | if (node != NUMA_NO_NODE && c->node != node) |
2045 | return 0; | 1962 | return 0; |
2046 | #endif | 1963 | #endif |
2047 | return 1; | 1964 | return 1; |
@@ -2084,10 +2001,10 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) | |||
2084 | "SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n", | 2001 | "SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n", |
2085 | nid, gfpflags); | 2002 | nid, gfpflags); |
2086 | printk(KERN_WARNING " cache: %s, object size: %d, buffer size: %d, " | 2003 | printk(KERN_WARNING " cache: %s, object size: %d, buffer size: %d, " |
2087 | "default order: %d, min order: %d\n", s->name, s->object_size, | 2004 | "default order: %d, min order: %d\n", s->name, s->objsize, |
2088 | s->size, oo_order(s->oo), oo_order(s->min)); | 2005 | s->size, oo_order(s->oo), oo_order(s->min)); |
2089 | 2006 | ||
2090 | if (oo_order(s->min) > get_order(s->object_size)) | 2007 | if (oo_order(s->min) > get_order(s->objsize)) |
2091 | printk(KERN_WARNING " %s debugging increased min order, use " | 2008 | printk(KERN_WARNING " %s debugging increased min order, use " |
2092 | "slub_debug=O to disable.\n", s->name); | 2009 | "slub_debug=O to disable.\n", s->name); |
2093 | 2010 | ||
@@ -2110,86 +2027,12 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) | |||
2110 | } | 2027 | } |
2111 | } | 2028 | } |
2112 | 2029 | ||
2113 | static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags, | ||
2114 | int node, struct kmem_cache_cpu **pc) | ||
2115 | { | ||
2116 | void *freelist; | ||
2117 | struct kmem_cache_cpu *c = *pc; | ||
2118 | struct page *page; | ||
2119 | |||
2120 | freelist = get_partial(s, flags, node, c); | ||
2121 | |||
2122 | if (freelist) | ||
2123 | return freelist; | ||
2124 | |||
2125 | page = new_slab(s, flags, node); | ||
2126 | if (page) { | ||
2127 | c = __this_cpu_ptr(s->cpu_slab); | ||
2128 | if (c->page) | ||
2129 | flush_slab(s, c); | ||
2130 | |||
2131 | /* | ||
2132 | * No other reference to the page yet so we can | ||
2133 | * muck around with it freely without cmpxchg | ||
2134 | */ | ||
2135 | freelist = page->freelist; | ||
2136 | page->freelist = NULL; | ||
2137 | |||
2138 | stat(s, ALLOC_SLAB); | ||
2139 | c->page = page; | ||
2140 | *pc = c; | ||
2141 | } else | ||
2142 | freelist = NULL; | ||
2143 | |||
2144 | return freelist; | ||
2145 | } | ||
2146 | |||
2147 | static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags) | ||
2148 | { | ||
2149 | if (unlikely(PageSlabPfmemalloc(page))) | ||
2150 | return gfp_pfmemalloc_allowed(gfpflags); | ||
2151 | |||
2152 | return true; | ||
2153 | } | ||
2154 | |||
2155 | /* | ||
2156 | * Check the page->freelist of a page and either transfer the freelist to the per cpu freelist | ||
2157 | * or deactivate the page. | ||
2158 | * | ||
2159 | * The page is still frozen if the return value is not NULL. | ||
2160 | * | ||
2161 | * If this function returns NULL then the page has been unfrozen. | ||
2162 | * | ||
2163 | * This function must be called with interrupt disabled. | ||
2164 | */ | ||
2165 | static inline void *get_freelist(struct kmem_cache *s, struct page *page) | ||
2166 | { | ||
2167 | struct page new; | ||
2168 | unsigned long counters; | ||
2169 | void *freelist; | ||
2170 | |||
2171 | do { | ||
2172 | freelist = page->freelist; | ||
2173 | counters = page->counters; | ||
2174 | |||
2175 | new.counters = counters; | ||
2176 | VM_BUG_ON(!new.frozen); | ||
2177 | |||
2178 | new.inuse = page->objects; | ||
2179 | new.frozen = freelist != NULL; | ||
2180 | |||
2181 | } while (!__cmpxchg_double_slab(s, page, | ||
2182 | freelist, counters, | ||
2183 | NULL, new.counters, | ||
2184 | "get_freelist")); | ||
2185 | |||
2186 | return freelist; | ||
2187 | } | ||
2188 | |||
2189 | /* | 2030 | /* |
2190 | * Slow path. The lockless freelist is empty or we need to perform | 2031 | * Slow path. The lockless freelist is empty or we need to perform |
2191 | * debugging duties. | 2032 | * debugging duties. |
2192 | * | 2033 | * |
2034 | * Interrupts are disabled. | ||
2035 | * | ||
2193 | * Processing is still very fast if new objects have been freed to the | 2036 | * Processing is still very fast if new objects have been freed to the |
2194 | * regular freelist. In that case we simply take over the regular freelist | 2037 | * regular freelist. In that case we simply take over the regular freelist |
2195 | * as the lockless freelist and zap the regular freelist. | 2038 | * as the lockless freelist and zap the regular freelist. |
@@ -2205,9 +2048,11 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page) | |||
2205 | static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, | 2048 | static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, |
2206 | unsigned long addr, struct kmem_cache_cpu *c) | 2049 | unsigned long addr, struct kmem_cache_cpu *c) |
2207 | { | 2050 | { |
2208 | void *freelist; | 2051 | void **object; |
2209 | struct page *page; | 2052 | struct page *page; |
2210 | unsigned long flags; | 2053 | unsigned long flags; |
2054 | struct page new; | ||
2055 | unsigned long counters; | ||
2211 | 2056 | ||
2212 | local_irq_save(flags); | 2057 | local_irq_save(flags); |
2213 | #ifdef CONFIG_PREEMPT | 2058 | #ifdef CONFIG_PREEMPT |
@@ -2219,41 +2064,51 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, | |||
2219 | c = this_cpu_ptr(s->cpu_slab); | 2064 | c = this_cpu_ptr(s->cpu_slab); |
2220 | #endif | 2065 | #endif |
2221 | 2066 | ||
2067 | /* We handle __GFP_ZERO in the caller */ | ||
2068 | gfpflags &= ~__GFP_ZERO; | ||
2069 | |||
2222 | page = c->page; | 2070 | page = c->page; |
2223 | if (!page) | 2071 | if (!page) |
2224 | goto new_slab; | 2072 | goto new_slab; |
2225 | redo: | ||
2226 | 2073 | ||
2227 | if (unlikely(!node_match(page, node))) { | 2074 | if (unlikely(!node_match(c, node))) { |
2228 | stat(s, ALLOC_NODE_MISMATCH); | 2075 | stat(s, ALLOC_NODE_MISMATCH); |
2229 | deactivate_slab(s, page, c->freelist); | 2076 | deactivate_slab(s, c); |
2230 | c->page = NULL; | ||
2231 | c->freelist = NULL; | ||
2232 | goto new_slab; | ||
2233 | } | ||
2234 | |||
2235 | /* | ||
2236 | * By rights, we should be searching for a slab page that was | ||
2237 | * PFMEMALLOC but right now, we are losing the pfmemalloc | ||
2238 | * information when the page leaves the per-cpu allocator | ||
2239 | */ | ||
2240 | if (unlikely(!pfmemalloc_match(page, gfpflags))) { | ||
2241 | deactivate_slab(s, page, c->freelist); | ||
2242 | c->page = NULL; | ||
2243 | c->freelist = NULL; | ||
2244 | goto new_slab; | 2077 | goto new_slab; |
2245 | } | 2078 | } |
2246 | 2079 | ||
2247 | /* must check again c->freelist in case of cpu migration or IRQ */ | 2080 | /* must check again c->freelist in case of cpu migration or IRQ */ |
2248 | freelist = c->freelist; | 2081 | object = c->freelist; |
2249 | if (freelist) | 2082 | if (object) |
2250 | goto load_freelist; | 2083 | goto load_freelist; |
2251 | 2084 | ||
2252 | stat(s, ALLOC_SLOWPATH); | 2085 | stat(s, ALLOC_SLOWPATH); |
2253 | 2086 | ||
2254 | freelist = get_freelist(s, page); | 2087 | do { |
2088 | object = page->freelist; | ||
2089 | counters = page->counters; | ||
2090 | new.counters = counters; | ||
2091 | VM_BUG_ON(!new.frozen); | ||
2092 | |||
2093 | /* | ||
2094 | * If there is no object left then we use this loop to | ||
2095 | * deactivate the slab which is simple since no objects | ||
2096 | * are left in the slab and therefore we do not need to | ||
2097 | * put the page back onto the partial list. | ||
2098 | * | ||
2099 | * If there are objects left then we retrieve them | ||
2100 | * and use them to refill the per cpu queue. | ||
2101 | */ | ||
2102 | |||
2103 | new.inuse = page->objects; | ||
2104 | new.frozen = object != NULL; | ||
2105 | |||
2106 | } while (!__cmpxchg_double_slab(s, page, | ||
2107 | object, counters, | ||
2108 | NULL, new.counters, | ||
2109 | "__slab_alloc")); | ||
2255 | 2110 | ||
2256 | if (!freelist) { | 2111 | if (unlikely(!object)) { |
2257 | c->page = NULL; | 2112 | c->page = NULL; |
2258 | stat(s, DEACTIVATE_BYPASS); | 2113 | stat(s, DEACTIVATE_BYPASS); |
2259 | goto new_slab; | 2114 | goto new_slab; |
@@ -2262,50 +2117,61 @@ redo: | |||
2262 | stat(s, ALLOC_REFILL); | 2117 | stat(s, ALLOC_REFILL); |
2263 | 2118 | ||
2264 | load_freelist: | 2119 | load_freelist: |
2265 | /* | 2120 | VM_BUG_ON(!page->frozen); |
2266 | * freelist is pointing to the list of objects to be used. | 2121 | c->freelist = get_freepointer(s, object); |
2267 | * page is pointing to the page from which the objects are obtained. | ||
2268 | * That page must be frozen for per cpu allocations to work. | ||
2269 | */ | ||
2270 | VM_BUG_ON(!c->page->frozen); | ||
2271 | c->freelist = get_freepointer(s, freelist); | ||
2272 | c->tid = next_tid(c->tid); | 2122 | c->tid = next_tid(c->tid); |
2273 | local_irq_restore(flags); | 2123 | local_irq_restore(flags); |
2274 | return freelist; | 2124 | return object; |
2275 | 2125 | ||
2276 | new_slab: | 2126 | new_slab: |
2127 | page = get_partial(s, gfpflags, node); | ||
2128 | if (page) { | ||
2129 | stat(s, ALLOC_FROM_PARTIAL); | ||
2130 | object = c->freelist; | ||
2277 | 2131 | ||
2278 | if (c->partial) { | 2132 | if (kmem_cache_debug(s)) |
2279 | page = c->page = c->partial; | 2133 | goto debug; |
2280 | c->partial = page->next; | 2134 | goto load_freelist; |
2281 | stat(s, CPU_PARTIAL_ALLOC); | ||
2282 | c->freelist = NULL; | ||
2283 | goto redo; | ||
2284 | } | 2135 | } |
2285 | 2136 | ||
2286 | freelist = new_slab_objects(s, gfpflags, node, &c); | 2137 | page = new_slab(s, gfpflags, node); |
2287 | 2138 | ||
2288 | if (unlikely(!freelist)) { | 2139 | if (page) { |
2289 | if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit()) | 2140 | c = __this_cpu_ptr(s->cpu_slab); |
2290 | slab_out_of_memory(s, gfpflags, node); | 2141 | if (c->page) |
2142 | flush_slab(s, c); | ||
2291 | 2143 | ||
2292 | local_irq_restore(flags); | 2144 | /* |
2293 | return NULL; | 2145 | * No other reference to the page yet so we can |
2294 | } | 2146 | * muck around with it freely without cmpxchg |
2147 | */ | ||
2148 | object = page->freelist; | ||
2149 | page->freelist = NULL; | ||
2150 | page->inuse = page->objects; | ||
2295 | 2151 | ||
2296 | page = c->page; | 2152 | stat(s, ALLOC_SLAB); |
2297 | if (likely(!kmem_cache_debug(s) && pfmemalloc_match(page, gfpflags))) | 2153 | c->node = page_to_nid(page); |
2154 | c->page = page; | ||
2155 | |||
2156 | if (kmem_cache_debug(s)) | ||
2157 | goto debug; | ||
2298 | goto load_freelist; | 2158 | goto load_freelist; |
2159 | } | ||
2160 | if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit()) | ||
2161 | slab_out_of_memory(s, gfpflags, node); | ||
2162 | local_irq_restore(flags); | ||
2163 | return NULL; | ||
2299 | 2164 | ||
2300 | /* Only entered in the debug case */ | 2165 | debug: |
2301 | if (kmem_cache_debug(s) && !alloc_debug_processing(s, page, freelist, addr)) | 2166 | if (!object || !alloc_debug_processing(s, page, object, addr)) |
2302 | goto new_slab; /* Slab failed checks. Next slab needed */ | 2167 | goto new_slab; |
2303 | 2168 | ||
2304 | deactivate_slab(s, page, get_freepointer(s, freelist)); | 2169 | c->freelist = get_freepointer(s, object); |
2170 | deactivate_slab(s, c); | ||
2305 | c->page = NULL; | 2171 | c->page = NULL; |
2306 | c->freelist = NULL; | 2172 | c->node = NUMA_NO_NODE; |
2307 | local_irq_restore(flags); | 2173 | local_irq_restore(flags); |
2308 | return freelist; | 2174 | return object; |
2309 | } | 2175 | } |
2310 | 2176 | ||
2311 | /* | 2177 | /* |
@@ -2318,18 +2184,16 @@ new_slab: | |||
2318 | * | 2184 | * |
2319 | * Otherwise we can simply pick the next object from the lockless free list. | 2185 | * Otherwise we can simply pick the next object from the lockless free list. |
2320 | */ | 2186 | */ |
2321 | static __always_inline void *slab_alloc_node(struct kmem_cache *s, | 2187 | static __always_inline void *slab_alloc(struct kmem_cache *s, |
2322 | gfp_t gfpflags, int node, unsigned long addr) | 2188 | gfp_t gfpflags, int node, unsigned long addr) |
2323 | { | 2189 | { |
2324 | void **object; | 2190 | void **object; |
2325 | struct kmem_cache_cpu *c; | 2191 | struct kmem_cache_cpu *c; |
2326 | struct page *page; | ||
2327 | unsigned long tid; | 2192 | unsigned long tid; |
2328 | 2193 | ||
2329 | if (slab_pre_alloc_hook(s, gfpflags)) | 2194 | if (slab_pre_alloc_hook(s, gfpflags)) |
2330 | return NULL; | 2195 | return NULL; |
2331 | 2196 | ||
2332 | s = memcg_kmem_get_cache(s, gfpflags); | ||
2333 | redo: | 2197 | redo: |
2334 | 2198 | ||
2335 | /* | 2199 | /* |
@@ -2350,13 +2214,11 @@ redo: | |||
2350 | barrier(); | 2214 | barrier(); |
2351 | 2215 | ||
2352 | object = c->freelist; | 2216 | object = c->freelist; |
2353 | page = c->page; | 2217 | if (unlikely(!object || !node_match(c, node))) |
2354 | if (unlikely(!object || !node_match(page, node))) | 2218 | |
2355 | object = __slab_alloc(s, gfpflags, node, addr, c); | 2219 | object = __slab_alloc(s, gfpflags, node, addr, c); |
2356 | 2220 | ||
2357 | else { | 2221 | else { |
2358 | void *next_object = get_freepointer_safe(s, object); | ||
2359 | |||
2360 | /* | 2222 | /* |
2361 | * The cmpxchg will only match if there was no additional | 2223 | * The cmpxchg will only match if there was no additional |
2362 | * operation and if we are on the right processor. | 2224 | * operation and if we are on the right processor. |
@@ -2369,37 +2231,30 @@ redo: | |||
2369 | * Since this is without lock semantics the protection is only against | 2231 | * Since this is without lock semantics the protection is only against |
2370 | * code executing on this cpu *not* from access by other cpus. | 2232 | * code executing on this cpu *not* from access by other cpus. |
2371 | */ | 2233 | */ |
2372 | if (unlikely(!this_cpu_cmpxchg_double( | 2234 | if (unlikely(!irqsafe_cpu_cmpxchg_double( |
2373 | s->cpu_slab->freelist, s->cpu_slab->tid, | 2235 | s->cpu_slab->freelist, s->cpu_slab->tid, |
2374 | object, tid, | 2236 | object, tid, |
2375 | next_object, next_tid(tid)))) { | 2237 | get_freepointer_safe(s, object), next_tid(tid)))) { |
2376 | 2238 | ||
2377 | note_cmpxchg_failure("slab_alloc", s, tid); | 2239 | note_cmpxchg_failure("slab_alloc", s, tid); |
2378 | goto redo; | 2240 | goto redo; |
2379 | } | 2241 | } |
2380 | prefetch_freepointer(s, next_object); | ||
2381 | stat(s, ALLOC_FASTPATH); | 2242 | stat(s, ALLOC_FASTPATH); |
2382 | } | 2243 | } |
2383 | 2244 | ||
2384 | if (unlikely(gfpflags & __GFP_ZERO) && object) | 2245 | if (unlikely(gfpflags & __GFP_ZERO) && object) |
2385 | memset(object, 0, s->object_size); | 2246 | memset(object, 0, s->objsize); |
2386 | 2247 | ||
2387 | slab_post_alloc_hook(s, gfpflags, object); | 2248 | slab_post_alloc_hook(s, gfpflags, object); |
2388 | 2249 | ||
2389 | return object; | 2250 | return object; |
2390 | } | 2251 | } |
2391 | 2252 | ||
2392 | static __always_inline void *slab_alloc(struct kmem_cache *s, | ||
2393 | gfp_t gfpflags, unsigned long addr) | ||
2394 | { | ||
2395 | return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr); | ||
2396 | } | ||
2397 | |||
2398 | void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) | 2253 | void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) |
2399 | { | 2254 | { |
2400 | void *ret = slab_alloc(s, gfpflags, _RET_IP_); | 2255 | void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_); |
2401 | 2256 | ||
2402 | trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size, s->size, gfpflags); | 2257 | trace_kmem_cache_alloc(_RET_IP_, ret, s->objsize, s->size, gfpflags); |
2403 | 2258 | ||
2404 | return ret; | 2259 | return ret; |
2405 | } | 2260 | } |
@@ -2408,7 +2263,7 @@ EXPORT_SYMBOL(kmem_cache_alloc); | |||
2408 | #ifdef CONFIG_TRACING | 2263 | #ifdef CONFIG_TRACING |
2409 | void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size) | 2264 | void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size) |
2410 | { | 2265 | { |
2411 | void *ret = slab_alloc(s, gfpflags, _RET_IP_); | 2266 | void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_); |
2412 | trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags); | 2267 | trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags); |
2413 | return ret; | 2268 | return ret; |
2414 | } | 2269 | } |
@@ -2426,10 +2281,10 @@ EXPORT_SYMBOL(kmalloc_order_trace); | |||
2426 | #ifdef CONFIG_NUMA | 2281 | #ifdef CONFIG_NUMA |
2427 | void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) | 2282 | void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) |
2428 | { | 2283 | { |
2429 | void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_); | 2284 | void *ret = slab_alloc(s, gfpflags, node, _RET_IP_); |
2430 | 2285 | ||
2431 | trace_kmem_cache_alloc_node(_RET_IP_, ret, | 2286 | trace_kmem_cache_alloc_node(_RET_IP_, ret, |
2432 | s->object_size, s->size, gfpflags, node); | 2287 | s->objsize, s->size, gfpflags, node); |
2433 | 2288 | ||
2434 | return ret; | 2289 | return ret; |
2435 | } | 2290 | } |
@@ -2440,7 +2295,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *s, | |||
2440 | gfp_t gfpflags, | 2295 | gfp_t gfpflags, |
2441 | int node, size_t size) | 2296 | int node, size_t size) |
2442 | { | 2297 | { |
2443 | void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_); | 2298 | void *ret = slab_alloc(s, gfpflags, node, _RET_IP_); |
2444 | 2299 | ||
2445 | trace_kmalloc_node(_RET_IP_, ret, | 2300 | trace_kmalloc_node(_RET_IP_, ret, |
2446 | size, s->size, gfpflags, node); | 2301 | size, s->size, gfpflags, node); |
@@ -2464,6 +2319,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, | |||
2464 | void *prior; | 2319 | void *prior; |
2465 | void **object = (void *)x; | 2320 | void **object = (void *)x; |
2466 | int was_frozen; | 2321 | int was_frozen; |
2322 | int inuse; | ||
2467 | struct page new; | 2323 | struct page new; |
2468 | unsigned long counters; | 2324 | unsigned long counters; |
2469 | struct kmem_cache_node *n = NULL; | 2325 | struct kmem_cache_node *n = NULL; |
@@ -2471,46 +2327,29 @@ static void __slab_free(struct kmem_cache *s, struct page *page, | |||
2471 | 2327 | ||
2472 | stat(s, FREE_SLOWPATH); | 2328 | stat(s, FREE_SLOWPATH); |
2473 | 2329 | ||
2474 | if (kmem_cache_debug(s) && | 2330 | if (kmem_cache_debug(s) && !free_debug_processing(s, page, x, addr)) |
2475 | !(n = free_debug_processing(s, page, x, addr, &flags))) | ||
2476 | return; | 2331 | return; |
2477 | 2332 | ||
2478 | do { | 2333 | do { |
2479 | if (unlikely(n)) { | ||
2480 | spin_unlock_irqrestore(&n->list_lock, flags); | ||
2481 | n = NULL; | ||
2482 | } | ||
2483 | prior = page->freelist; | 2334 | prior = page->freelist; |
2484 | counters = page->counters; | 2335 | counters = page->counters; |
2485 | set_freepointer(s, object, prior); | 2336 | set_freepointer(s, object, prior); |
2486 | new.counters = counters; | 2337 | new.counters = counters; |
2487 | was_frozen = new.frozen; | 2338 | was_frozen = new.frozen; |
2488 | new.inuse--; | 2339 | new.inuse--; |
2489 | if ((!new.inuse || !prior) && !was_frozen) { | 2340 | if ((!new.inuse || !prior) && !was_frozen && !n) { |
2490 | 2341 | n = get_node(s, page_to_nid(page)); | |
2491 | if (!kmem_cache_debug(s) && !prior) | 2342 | /* |
2492 | 2343 | * Speculatively acquire the list_lock. | |
2493 | /* | 2344 | * If the cmpxchg does not succeed then we may |
2494 | * Slab was on no list before and will be partially empty | 2345 | * drop the list_lock without any processing. |
2495 | * We can defer the list move and instead freeze it. | 2346 | * |
2496 | */ | 2347 | * Otherwise the list_lock will synchronize with |
2497 | new.frozen = 1; | 2348 | * other processors updating the list of slabs. |
2498 | 2349 | */ | |
2499 | else { /* Needs to be taken off a list */ | 2350 | spin_lock_irqsave(&n->list_lock, flags); |
2500 | |||
2501 | n = get_node(s, page_to_nid(page)); | ||
2502 | /* | ||
2503 | * Speculatively acquire the list_lock. | ||
2504 | * If the cmpxchg does not succeed then we may | ||
2505 | * drop the list_lock without any processing. | ||
2506 | * | ||
2507 | * Otherwise the list_lock will synchronize with | ||
2508 | * other processors updating the list of slabs. | ||
2509 | */ | ||
2510 | spin_lock_irqsave(&n->list_lock, flags); | ||
2511 | |||
2512 | } | ||
2513 | } | 2351 | } |
2352 | inuse = new.inuse; | ||
2514 | 2353 | ||
2515 | } while (!cmpxchg_double_slab(s, page, | 2354 | } while (!cmpxchg_double_slab(s, page, |
2516 | prior, counters, | 2355 | prior, counters, |
@@ -2518,16 +2357,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, | |||
2518 | "__slab_free")); | 2357 | "__slab_free")); |
2519 | 2358 | ||
2520 | if (likely(!n)) { | 2359 | if (likely(!n)) { |
2521 | 2360 | /* | |
2522 | /* | ||
2523 | * If we just froze the page then put it onto the | ||
2524 | * per cpu partial list. | ||
2525 | */ | ||
2526 | if (new.frozen && !was_frozen) { | ||
2527 | put_cpu_partial(s, page, 1); | ||
2528 | stat(s, CPU_PARTIAL_FREE); | ||
2529 | } | ||
2530 | /* | ||
2531 | * The list lock was not taken therefore no list | 2361 | * The list lock was not taken therefore no list |
2532 | * activity can be necessary. | 2362 | * activity can be necessary. |
2533 | */ | 2363 | */ |
@@ -2536,17 +2366,25 @@ static void __slab_free(struct kmem_cache *s, struct page *page, | |||
2536 | return; | 2366 | return; |
2537 | } | 2367 | } |
2538 | 2368 | ||
2539 | if (unlikely(!new.inuse && n->nr_partial > s->min_partial)) | ||
2540 | goto slab_empty; | ||
2541 | |||
2542 | /* | 2369 | /* |
2543 | * Objects left in the slab. If it was not on the partial list before | 2370 | * was_frozen may have been set after we acquired the list_lock in |
2544 | * then add it. | 2371 | * an earlier loop. So we need to check it here again. |
2545 | */ | 2372 | */ |
2546 | if (kmem_cache_debug(s) && unlikely(!prior)) { | 2373 | if (was_frozen) |
2547 | remove_full(s, page); | 2374 | stat(s, FREE_FROZEN); |
2548 | add_partial(n, page, DEACTIVATE_TO_TAIL); | 2375 | else { |
2549 | stat(s, FREE_ADD_PARTIAL); | 2376 | if (unlikely(!inuse && n->nr_partial > s->min_partial)) |
2377 | goto slab_empty; | ||
2378 | |||
2379 | /* | ||
2380 | * Objects left in the slab. If it was not on the partial list before | ||
2381 | * then add it. | ||
2382 | */ | ||
2383 | if (unlikely(!prior)) { | ||
2384 | remove_full(s, page); | ||
2385 | add_partial(n, page, 1); | ||
2386 | stat(s, FREE_ADD_PARTIAL); | ||
2387 | } | ||
2550 | } | 2388 | } |
2551 | spin_unlock_irqrestore(&n->list_lock, flags); | 2389 | spin_unlock_irqrestore(&n->list_lock, flags); |
2552 | return; | 2390 | return; |
@@ -2588,6 +2426,7 @@ static __always_inline void slab_free(struct kmem_cache *s, | |||
2588 | slab_free_hook(s, x); | 2426 | slab_free_hook(s, x); |
2589 | 2427 | ||
2590 | redo: | 2428 | redo: |
2429 | |||
2591 | /* | 2430 | /* |
2592 | * Determine the currently cpus per cpu slab. | 2431 | * Determine the currently cpus per cpu slab. |
2593 | * The cpu may change afterward. However that does not matter since | 2432 | * The cpu may change afterward. However that does not matter since |
@@ -2602,7 +2441,7 @@ redo: | |||
2602 | if (likely(page == c->page)) { | 2441 | if (likely(page == c->page)) { |
2603 | set_freepointer(s, object, c->freelist); | 2442 | set_freepointer(s, object, c->freelist); |
2604 | 2443 | ||
2605 | if (unlikely(!this_cpu_cmpxchg_double( | 2444 | if (unlikely(!irqsafe_cpu_cmpxchg_double( |
2606 | s->cpu_slab->freelist, s->cpu_slab->tid, | 2445 | s->cpu_slab->freelist, s->cpu_slab->tid, |
2607 | c->freelist, tid, | 2446 | c->freelist, tid, |
2608 | object, next_tid(tid)))) { | 2447 | object, next_tid(tid)))) { |
@@ -2618,10 +2457,12 @@ redo: | |||
2618 | 2457 | ||
2619 | void kmem_cache_free(struct kmem_cache *s, void *x) | 2458 | void kmem_cache_free(struct kmem_cache *s, void *x) |
2620 | { | 2459 | { |
2621 | s = cache_from_obj(s, x); | 2460 | struct page *page; |
2622 | if (!s) | 2461 | |
2623 | return; | 2462 | page = virt_to_head_page(x); |
2624 | slab_free(s, virt_to_head_page(x), x, _RET_IP_); | 2463 | |
2464 | slab_free(s, page, x, _RET_IP_); | ||
2465 | |||
2625 | trace_kmem_cache_free(_RET_IP_, x); | 2466 | trace_kmem_cache_free(_RET_IP_, x); |
2626 | } | 2467 | } |
2627 | EXPORT_SYMBOL(kmem_cache_free); | 2468 | EXPORT_SYMBOL(kmem_cache_free); |
@@ -2759,8 +2600,34 @@ static inline int calculate_order(int size, int reserved) | |||
2759 | return -ENOSYS; | 2600 | return -ENOSYS; |
2760 | } | 2601 | } |
2761 | 2602 | ||
2603 | /* | ||
2604 | * Figure out what the alignment of the objects will be. | ||
2605 | */ | ||
2606 | static unsigned long calculate_alignment(unsigned long flags, | ||
2607 | unsigned long align, unsigned long size) | ||
2608 | { | ||
2609 | /* | ||
2610 | * If the user wants hardware cache aligned objects then follow that | ||
2611 | * suggestion if the object is sufficiently large. | ||
2612 | * | ||
2613 | * The hardware cache alignment cannot override the specified | ||
2614 | * alignment though. If that is greater then use it. | ||
2615 | */ | ||
2616 | if (flags & SLAB_HWCACHE_ALIGN) { | ||
2617 | unsigned long ralign = cache_line_size(); | ||
2618 | while (size <= ralign / 2) | ||
2619 | ralign /= 2; | ||
2620 | align = max(align, ralign); | ||
2621 | } | ||
2622 | |||
2623 | if (align < ARCH_SLAB_MINALIGN) | ||
2624 | align = ARCH_SLAB_MINALIGN; | ||
2625 | |||
2626 | return ALIGN(align, sizeof(void *)); | ||
2627 | } | ||
2628 | |||
2762 | static void | 2629 | static void |
2763 | init_kmem_cache_node(struct kmem_cache_node *n) | 2630 | init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s) |
2764 | { | 2631 | { |
2765 | n->nr_partial = 0; | 2632 | n->nr_partial = 0; |
2766 | spin_lock_init(&n->list_lock); | 2633 | spin_lock_init(&n->list_lock); |
@@ -2823,17 +2690,17 @@ static void early_kmem_cache_node_alloc(int node) | |||
2823 | n = page->freelist; | 2690 | n = page->freelist; |
2824 | BUG_ON(!n); | 2691 | BUG_ON(!n); |
2825 | page->freelist = get_freepointer(kmem_cache_node, n); | 2692 | page->freelist = get_freepointer(kmem_cache_node, n); |
2826 | page->inuse = 1; | 2693 | page->inuse++; |
2827 | page->frozen = 0; | 2694 | page->frozen = 0; |
2828 | kmem_cache_node->node[node] = n; | 2695 | kmem_cache_node->node[node] = n; |
2829 | #ifdef CONFIG_SLUB_DEBUG | 2696 | #ifdef CONFIG_SLUB_DEBUG |
2830 | init_object(kmem_cache_node, n, SLUB_RED_ACTIVE); | 2697 | init_object(kmem_cache_node, n, SLUB_RED_ACTIVE); |
2831 | init_tracking(kmem_cache_node, n); | 2698 | init_tracking(kmem_cache_node, n); |
2832 | #endif | 2699 | #endif |
2833 | init_kmem_cache_node(n); | 2700 | init_kmem_cache_node(n, kmem_cache_node); |
2834 | inc_slabs_node(kmem_cache_node, node, page->objects); | 2701 | inc_slabs_node(kmem_cache_node, node, page->objects); |
2835 | 2702 | ||
2836 | add_partial(n, page, DEACTIVATE_TO_HEAD); | 2703 | add_partial(n, page, 0); |
2837 | } | 2704 | } |
2838 | 2705 | ||
2839 | static void free_kmem_cache_nodes(struct kmem_cache *s) | 2706 | static void free_kmem_cache_nodes(struct kmem_cache *s) |
@@ -2870,7 +2737,7 @@ static int init_kmem_cache_nodes(struct kmem_cache *s) | |||
2870 | } | 2737 | } |
2871 | 2738 | ||
2872 | s->node[node] = n; | 2739 | s->node[node] = n; |
2873 | init_kmem_cache_node(n); | 2740 | init_kmem_cache_node(n, s); |
2874 | } | 2741 | } |
2875 | return 1; | 2742 | return 1; |
2876 | } | 2743 | } |
@@ -2891,7 +2758,8 @@ static void set_min_partial(struct kmem_cache *s, unsigned long min) | |||
2891 | static int calculate_sizes(struct kmem_cache *s, int forced_order) | 2758 | static int calculate_sizes(struct kmem_cache *s, int forced_order) |
2892 | { | 2759 | { |
2893 | unsigned long flags = s->flags; | 2760 | unsigned long flags = s->flags; |
2894 | unsigned long size = s->object_size; | 2761 | unsigned long size = s->objsize; |
2762 | unsigned long align = s->align; | ||
2895 | int order; | 2763 | int order; |
2896 | 2764 | ||
2897 | /* | 2765 | /* |
@@ -2919,7 +2787,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) | |||
2919 | * end of the object and the free pointer. If not then add an | 2787 | * end of the object and the free pointer. If not then add an |
2920 | * additional word to have some bytes to store Redzone information. | 2788 | * additional word to have some bytes to store Redzone information. |
2921 | */ | 2789 | */ |
2922 | if ((flags & SLAB_RED_ZONE) && size == s->object_size) | 2790 | if ((flags & SLAB_RED_ZONE) && size == s->objsize) |
2923 | size += sizeof(void *); | 2791 | size += sizeof(void *); |
2924 | #endif | 2792 | #endif |
2925 | 2793 | ||
@@ -2963,11 +2831,19 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) | |||
2963 | #endif | 2831 | #endif |
2964 | 2832 | ||
2965 | /* | 2833 | /* |
2834 | * Determine the alignment based on various parameters that the | ||
2835 | * user specified and the dynamic determination of cache line size | ||
2836 | * on bootup. | ||
2837 | */ | ||
2838 | align = calculate_alignment(flags, align, s->objsize); | ||
2839 | s->align = align; | ||
2840 | |||
2841 | /* | ||
2966 | * SLUB stores one object immediately after another beginning from | 2842 | * SLUB stores one object immediately after another beginning from |
2967 | * offset 0. In order to align the objects we have to simply size | 2843 | * offset 0. In order to align the objects we have to simply size |
2968 | * each object to conform to the alignment. | 2844 | * each object to conform to the alignment. |
2969 | */ | 2845 | */ |
2970 | size = ALIGN(size, s->align); | 2846 | size = ALIGN(size, align); |
2971 | s->size = size; | 2847 | s->size = size; |
2972 | if (forced_order >= 0) | 2848 | if (forced_order >= 0) |
2973 | order = forced_order; | 2849 | order = forced_order; |
@@ -2996,11 +2872,20 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) | |||
2996 | s->max = s->oo; | 2872 | s->max = s->oo; |
2997 | 2873 | ||
2998 | return !!oo_objects(s->oo); | 2874 | return !!oo_objects(s->oo); |
2875 | |||
2999 | } | 2876 | } |
3000 | 2877 | ||
3001 | static int kmem_cache_open(struct kmem_cache *s, unsigned long flags) | 2878 | static int kmem_cache_open(struct kmem_cache *s, |
2879 | const char *name, size_t size, | ||
2880 | size_t align, unsigned long flags, | ||
2881 | void (*ctor)(void *)) | ||
3002 | { | 2882 | { |
3003 | s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor); | 2883 | memset(s, 0, kmem_size); |
2884 | s->name = name; | ||
2885 | s->ctor = ctor; | ||
2886 | s->objsize = size; | ||
2887 | s->align = align; | ||
2888 | s->flags = kmem_cache_flags(size, flags, name, ctor); | ||
3004 | s->reserved = 0; | 2889 | s->reserved = 0; |
3005 | 2890 | ||
3006 | if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU)) | 2891 | if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU)) |
@@ -3013,7 +2898,7 @@ static int kmem_cache_open(struct kmem_cache *s, unsigned long flags) | |||
3013 | * Disable debugging flags that store metadata if the min slab | 2898 | * Disable debugging flags that store metadata if the min slab |
3014 | * order increased. | 2899 | * order increased. |
3015 | */ | 2900 | */ |
3016 | if (get_order(s->size) > get_order(s->object_size)) { | 2901 | if (get_order(s->size) > get_order(s->objsize)) { |
3017 | s->flags &= ~DEBUG_METADATA_FLAGS; | 2902 | s->flags &= ~DEBUG_METADATA_FLAGS; |
3018 | s->offset = 0; | 2903 | s->offset = 0; |
3019 | if (!calculate_sizes(s, -1)) | 2904 | if (!calculate_sizes(s, -1)) |
@@ -3021,8 +2906,7 @@ static int kmem_cache_open(struct kmem_cache *s, unsigned long flags) | |||
3021 | } | 2906 | } |
3022 | } | 2907 | } |
3023 | 2908 | ||
3024 | #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \ | 2909 | #ifdef CONFIG_CMPXCHG_DOUBLE |
3025 | defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) | ||
3026 | if (system_has_cmpxchg_double() && (s->flags & SLAB_DEBUG_FLAGS) == 0) | 2910 | if (system_has_cmpxchg_double() && (s->flags & SLAB_DEBUG_FLAGS) == 0) |
3027 | /* Enable fast mode */ | 2911 | /* Enable fast mode */ |
3028 | s->flags |= __CMPXCHG_DOUBLE; | 2912 | s->flags |= __CMPXCHG_DOUBLE; |
@@ -3032,36 +2916,8 @@ static int kmem_cache_open(struct kmem_cache *s, unsigned long flags) | |||
3032 | * The larger the object size is, the more pages we want on the partial | 2916 | * The larger the object size is, the more pages we want on the partial |
3033 | * list to avoid pounding the page allocator excessively. | 2917 | * list to avoid pounding the page allocator excessively. |
3034 | */ | 2918 | */ |
3035 | set_min_partial(s, ilog2(s->size) / 2); | 2919 | set_min_partial(s, ilog2(s->size)); |
3036 | 2920 | s->refcount = 1; | |
3037 | /* | ||
3038 | * cpu_partial determined the maximum number of objects kept in the | ||
3039 | * per cpu partial lists of a processor. | ||
3040 | * | ||
3041 | * Per cpu partial lists mainly contain slabs that just have one | ||
3042 | * object freed. If they are used for allocation then they can be | ||
3043 | * filled up again with minimal effort. The slab will never hit the | ||
3044 | * per node partial lists and therefore no locking will be required. | ||
3045 | * | ||
3046 | * This setting also determines | ||
3047 | * | ||
3048 | * A) The number of objects from per cpu partial slabs dumped to the | ||
3049 | * per node list when we reach the limit. | ||
3050 | * B) The number of objects in cpu partial slabs to extract from the | ||
3051 | * per node list when we run out of per cpu objects. We only fetch 50% | ||
3052 | * to keep some capacity around for frees. | ||
3053 | */ | ||
3054 | if (kmem_cache_debug(s)) | ||
3055 | s->cpu_partial = 0; | ||
3056 | else if (s->size >= PAGE_SIZE) | ||
3057 | s->cpu_partial = 2; | ||
3058 | else if (s->size >= 1024) | ||
3059 | s->cpu_partial = 6; | ||
3060 | else if (s->size >= 256) | ||
3061 | s->cpu_partial = 13; | ||
3062 | else | ||
3063 | s->cpu_partial = 30; | ||
3064 | |||
3065 | #ifdef CONFIG_NUMA | 2921 | #ifdef CONFIG_NUMA |
3066 | s->remote_node_defrag_ratio = 1000; | 2922 | s->remote_node_defrag_ratio = 1000; |
3067 | #endif | 2923 | #endif |
@@ -3069,17 +2925,26 @@ static int kmem_cache_open(struct kmem_cache *s, unsigned long flags) | |||
3069 | goto error; | 2925 | goto error; |
3070 | 2926 | ||
3071 | if (alloc_kmem_cache_cpus(s)) | 2927 | if (alloc_kmem_cache_cpus(s)) |
3072 | return 0; | 2928 | return 1; |
3073 | 2929 | ||
3074 | free_kmem_cache_nodes(s); | 2930 | free_kmem_cache_nodes(s); |
3075 | error: | 2931 | error: |
3076 | if (flags & SLAB_PANIC) | 2932 | if (flags & SLAB_PANIC) |
3077 | panic("Cannot create slab %s size=%lu realsize=%u " | 2933 | panic("Cannot create slab %s size=%lu realsize=%u " |
3078 | "order=%u offset=%u flags=%lx\n", | 2934 | "order=%u offset=%u flags=%lx\n", |
3079 | s->name, (unsigned long)s->size, s->size, oo_order(s->oo), | 2935 | s->name, (unsigned long)size, s->size, oo_order(s->oo), |
3080 | s->offset, flags); | 2936 | s->offset, flags); |
3081 | return -EINVAL; | 2937 | return 0; |
2938 | } | ||
2939 | |||
2940 | /* | ||
2941 | * Determine the size of a slab object | ||
2942 | */ | ||
2943 | unsigned int kmem_cache_size(struct kmem_cache *s) | ||
2944 | { | ||
2945 | return s->objsize; | ||
3082 | } | 2946 | } |
2947 | EXPORT_SYMBOL(kmem_cache_size); | ||
3083 | 2948 | ||
3084 | static void list_slab_objects(struct kmem_cache *s, struct page *page, | 2949 | static void list_slab_objects(struct kmem_cache *s, struct page *page, |
3085 | const char *text) | 2950 | const char *text) |
@@ -3091,7 +2956,7 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page, | |||
3091 | sizeof(long), GFP_ATOMIC); | 2956 | sizeof(long), GFP_ATOMIC); |
3092 | if (!map) | 2957 | if (!map) |
3093 | return; | 2958 | return; |
3094 | slab_err(s, page, text, s->name); | 2959 | slab_err(s, page, "%s", text); |
3095 | slab_lock(page); | 2960 | slab_lock(page); |
3096 | 2961 | ||
3097 | get_map(s, page, map); | 2962 | get_map(s, page, map); |
@@ -3110,22 +2975,23 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page, | |||
3110 | 2975 | ||
3111 | /* | 2976 | /* |
3112 | * Attempt to free all partial slabs on a node. | 2977 | * Attempt to free all partial slabs on a node. |
3113 | * This is called from kmem_cache_close(). We must be the last thread | ||
3114 | * using the cache and therefore we do not need to lock anymore. | ||
3115 | */ | 2978 | */ |
3116 | static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n) | 2979 | static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n) |
3117 | { | 2980 | { |
2981 | unsigned long flags; | ||
3118 | struct page *page, *h; | 2982 | struct page *page, *h; |
3119 | 2983 | ||
2984 | spin_lock_irqsave(&n->list_lock, flags); | ||
3120 | list_for_each_entry_safe(page, h, &n->partial, lru) { | 2985 | list_for_each_entry_safe(page, h, &n->partial, lru) { |
3121 | if (!page->inuse) { | 2986 | if (!page->inuse) { |
3122 | remove_partial(n, page); | 2987 | remove_partial(n, page); |
3123 | discard_slab(s, page); | 2988 | discard_slab(s, page); |
3124 | } else { | 2989 | } else { |
3125 | list_slab_objects(s, page, | 2990 | list_slab_objects(s, page, |
3126 | "Objects remaining in %s on kmem_cache_close()"); | 2991 | "Objects remaining on kmem_cache_close()"); |
3127 | } | 2992 | } |
3128 | } | 2993 | } |
2994 | spin_unlock_irqrestore(&n->list_lock, flags); | ||
3129 | } | 2995 | } |
3130 | 2996 | ||
3131 | /* | 2997 | /* |
@@ -3136,6 +3002,7 @@ static inline int kmem_cache_close(struct kmem_cache *s) | |||
3136 | int node; | 3002 | int node; |
3137 | 3003 | ||
3138 | flush_all(s); | 3004 | flush_all(s); |
3005 | free_percpu(s->cpu_slab); | ||
3139 | /* Attempt to free all objects */ | 3006 | /* Attempt to free all objects */ |
3140 | for_each_node_state(node, N_NORMAL_MEMORY) { | 3007 | for_each_node_state(node, N_NORMAL_MEMORY) { |
3141 | struct kmem_cache_node *n = get_node(s, node); | 3008 | struct kmem_cache_node *n = get_node(s, node); |
@@ -3144,31 +3011,32 @@ static inline int kmem_cache_close(struct kmem_cache *s) | |||
3144 | if (n->nr_partial || slabs_node(s, node)) | 3011 | if (n->nr_partial || slabs_node(s, node)) |
3145 | return 1; | 3012 | return 1; |
3146 | } | 3013 | } |
3147 | free_percpu(s->cpu_slab); | ||
3148 | free_kmem_cache_nodes(s); | 3014 | free_kmem_cache_nodes(s); |
3149 | return 0; | 3015 | return 0; |
3150 | } | 3016 | } |
3151 | 3017 | ||
3152 | int __kmem_cache_shutdown(struct kmem_cache *s) | 3018 | /* |
3153 | { | 3019 | * Close a cache and release the kmem_cache structure |
3154 | int rc = kmem_cache_close(s); | 3020 | * (must be used for caches created using kmem_cache_create) |
3155 | 3021 | */ | |
3156 | if (!rc) { | 3022 | void kmem_cache_destroy(struct kmem_cache *s) |
3157 | /* | 3023 | { |
3158 | * We do the same lock strategy around sysfs_slab_add, see | 3024 | down_write(&slub_lock); |
3159 | * __kmem_cache_create. Because this is pretty much the last | 3025 | s->refcount--; |
3160 | * operation we do and the lock will be released shortly after | 3026 | if (!s->refcount) { |
3161 | * that in slab_common.c, we could just move sysfs_slab_remove | 3027 | list_del(&s->list); |
3162 | * to a later point in common code. We should do that when we | 3028 | if (kmem_cache_close(s)) { |
3163 | * have a common sysfs framework for all allocators. | 3029 | printk(KERN_ERR "SLUB %s: %s called for cache that " |
3164 | */ | 3030 | "still has objects.\n", s->name, __func__); |
3165 | mutex_unlock(&slab_mutex); | 3031 | dump_stack(); |
3032 | } | ||
3033 | if (s->flags & SLAB_DESTROY_BY_RCU) | ||
3034 | rcu_barrier(); | ||
3166 | sysfs_slab_remove(s); | 3035 | sysfs_slab_remove(s); |
3167 | mutex_lock(&slab_mutex); | ||
3168 | } | 3036 | } |
3169 | 3037 | up_write(&slub_lock); | |
3170 | return rc; | ||
3171 | } | 3038 | } |
3039 | EXPORT_SYMBOL(kmem_cache_destroy); | ||
3172 | 3040 | ||
3173 | /******************************************************************** | 3041 | /******************************************************************** |
3174 | * Kmalloc subsystem | 3042 | * Kmalloc subsystem |
@@ -3177,6 +3045,8 @@ int __kmem_cache_shutdown(struct kmem_cache *s) | |||
3177 | struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT]; | 3045 | struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT]; |
3178 | EXPORT_SYMBOL(kmalloc_caches); | 3046 | EXPORT_SYMBOL(kmalloc_caches); |
3179 | 3047 | ||
3048 | static struct kmem_cache *kmem_cache; | ||
3049 | |||
3180 | #ifdef CONFIG_ZONE_DMA | 3050 | #ifdef CONFIG_ZONE_DMA |
3181 | static struct kmem_cache *kmalloc_dma_caches[SLUB_PAGE_SHIFT]; | 3051 | static struct kmem_cache *kmalloc_dma_caches[SLUB_PAGE_SHIFT]; |
3182 | #endif | 3052 | #endif |
@@ -3217,6 +3087,29 @@ static int __init setup_slub_nomerge(char *str) | |||
3217 | 3087 | ||
3218 | __setup("slub_nomerge", setup_slub_nomerge); | 3088 | __setup("slub_nomerge", setup_slub_nomerge); |
3219 | 3089 | ||
3090 | static struct kmem_cache *__init create_kmalloc_cache(const char *name, | ||
3091 | int size, unsigned int flags) | ||
3092 | { | ||
3093 | struct kmem_cache *s; | ||
3094 | |||
3095 | s = kmem_cache_alloc(kmem_cache, GFP_NOWAIT); | ||
3096 | |||
3097 | /* | ||
3098 | * This function is called with IRQs disabled during early-boot on | ||
3099 | * single CPU so there's no need to take slub_lock here. | ||
3100 | */ | ||
3101 | if (!kmem_cache_open(s, name, size, ARCH_KMALLOC_MINALIGN, | ||
3102 | flags, NULL)) | ||
3103 | goto panic; | ||
3104 | |||
3105 | list_add(&s->list, &slab_caches); | ||
3106 | return s; | ||
3107 | |||
3108 | panic: | ||
3109 | panic("Creation of kmalloc slab %s size=%d failed.\n", name, size); | ||
3110 | return NULL; | ||
3111 | } | ||
3112 | |||
3220 | /* | 3113 | /* |
3221 | * Conversion table for small slabs sizes / 8 to the index in the | 3114 | * Conversion table for small slabs sizes / 8 to the index in the |
3222 | * kmalloc array. This is necessary for slabs < 192 since we have non power | 3115 | * kmalloc array. This is necessary for slabs < 192 since we have non power |
@@ -3288,7 +3181,7 @@ void *__kmalloc(size_t size, gfp_t flags) | |||
3288 | if (unlikely(ZERO_OR_NULL_PTR(s))) | 3181 | if (unlikely(ZERO_OR_NULL_PTR(s))) |
3289 | return s; | 3182 | return s; |
3290 | 3183 | ||
3291 | ret = slab_alloc(s, flags, _RET_IP_); | 3184 | ret = slab_alloc(s, flags, NUMA_NO_NODE, _RET_IP_); |
3292 | 3185 | ||
3293 | trace_kmalloc(_RET_IP_, ret, size, s->size, flags); | 3186 | trace_kmalloc(_RET_IP_, ret, size, s->size, flags); |
3294 | 3187 | ||
@@ -3302,7 +3195,7 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node) | |||
3302 | struct page *page; | 3195 | struct page *page; |
3303 | void *ptr = NULL; | 3196 | void *ptr = NULL; |
3304 | 3197 | ||
3305 | flags |= __GFP_COMP | __GFP_NOTRACK | __GFP_KMEMCG; | 3198 | flags |= __GFP_COMP | __GFP_NOTRACK; |
3306 | page = alloc_pages_node(node, flags, get_order(size)); | 3199 | page = alloc_pages_node(node, flags, get_order(size)); |
3307 | if (page) | 3200 | if (page) |
3308 | ptr = page_address(page); | 3201 | ptr = page_address(page); |
@@ -3331,7 +3224,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) | |||
3331 | if (unlikely(ZERO_OR_NULL_PTR(s))) | 3224 | if (unlikely(ZERO_OR_NULL_PTR(s))) |
3332 | return s; | 3225 | return s; |
3333 | 3226 | ||
3334 | ret = slab_alloc_node(s, flags, node, _RET_IP_); | 3227 | ret = slab_alloc(s, flags, node, _RET_IP_); |
3335 | 3228 | ||
3336 | trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node); | 3229 | trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node); |
3337 | 3230 | ||
@@ -3354,7 +3247,7 @@ size_t ksize(const void *object) | |||
3354 | return PAGE_SIZE << compound_order(page); | 3247 | return PAGE_SIZE << compound_order(page); |
3355 | } | 3248 | } |
3356 | 3249 | ||
3357 | return slab_ksize(page->slab_cache); | 3250 | return slab_ksize(page->slab); |
3358 | } | 3251 | } |
3359 | EXPORT_SYMBOL(ksize); | 3252 | EXPORT_SYMBOL(ksize); |
3360 | 3253 | ||
@@ -3379,8 +3272,8 @@ bool verify_mem_not_deleted(const void *x) | |||
3379 | } | 3272 | } |
3380 | 3273 | ||
3381 | slab_lock(page); | 3274 | slab_lock(page); |
3382 | if (on_freelist(page->slab_cache, page, object)) { | 3275 | if (on_freelist(page->slab, page, object)) { |
3383 | object_err(page->slab_cache, page, object, "Object is on free-list"); | 3276 | object_err(page->slab, page, object, "Object is on free-list"); |
3384 | rv = false; | 3277 | rv = false; |
3385 | } else { | 3278 | } else { |
3386 | rv = true; | 3279 | rv = true; |
@@ -3408,10 +3301,10 @@ void kfree(const void *x) | |||
3408 | if (unlikely(!PageSlab(page))) { | 3301 | if (unlikely(!PageSlab(page))) { |
3409 | BUG_ON(!PageCompound(page)); | 3302 | BUG_ON(!PageCompound(page)); |
3410 | kmemleak_free(x); | 3303 | kmemleak_free(x); |
3411 | __free_memcg_kmem_pages(page, compound_order(page)); | 3304 | put_page(page); |
3412 | return; | 3305 | return; |
3413 | } | 3306 | } |
3414 | slab_free(page->slab_cache, page, object, _RET_IP_); | 3307 | slab_free(page->slab, page, object, _RET_IP_); |
3415 | } | 3308 | } |
3416 | EXPORT_SYMBOL(kfree); | 3309 | EXPORT_SYMBOL(kfree); |
3417 | 3310 | ||
@@ -3459,23 +3352,23 @@ int kmem_cache_shrink(struct kmem_cache *s) | |||
3459 | * list_lock. page->inuse here is the upper limit. | 3352 | * list_lock. page->inuse here is the upper limit. |
3460 | */ | 3353 | */ |
3461 | list_for_each_entry_safe(page, t, &n->partial, lru) { | 3354 | list_for_each_entry_safe(page, t, &n->partial, lru) { |
3462 | list_move(&page->lru, slabs_by_inuse + page->inuse); | 3355 | if (!page->inuse) { |
3463 | if (!page->inuse) | 3356 | remove_partial(n, page); |
3464 | n->nr_partial--; | 3357 | discard_slab(s, page); |
3358 | } else { | ||
3359 | list_move(&page->lru, | ||
3360 | slabs_by_inuse + page->inuse); | ||
3361 | } | ||
3465 | } | 3362 | } |
3466 | 3363 | ||
3467 | /* | 3364 | /* |
3468 | * Rebuild the partial list with the slabs filled up most | 3365 | * Rebuild the partial list with the slabs filled up most |
3469 | * first and the least used slabs at the end. | 3366 | * first and the least used slabs at the end. |
3470 | */ | 3367 | */ |
3471 | for (i = objects - 1; i > 0; i--) | 3368 | for (i = objects - 1; i >= 0; i--) |
3472 | list_splice(slabs_by_inuse + i, n->partial.prev); | 3369 | list_splice(slabs_by_inuse + i, n->partial.prev); |
3473 | 3370 | ||
3474 | spin_unlock_irqrestore(&n->list_lock, flags); | 3371 | spin_unlock_irqrestore(&n->list_lock, flags); |
3475 | |||
3476 | /* Release empty slabs */ | ||
3477 | list_for_each_entry_safe(page, t, slabs_by_inuse, lru) | ||
3478 | discard_slab(s, page); | ||
3479 | } | 3372 | } |
3480 | 3373 | ||
3481 | kfree(slabs_by_inuse); | 3374 | kfree(slabs_by_inuse); |
@@ -3488,10 +3381,10 @@ static int slab_mem_going_offline_callback(void *arg) | |||
3488 | { | 3381 | { |
3489 | struct kmem_cache *s; | 3382 | struct kmem_cache *s; |
3490 | 3383 | ||
3491 | mutex_lock(&slab_mutex); | 3384 | down_read(&slub_lock); |
3492 | list_for_each_entry(s, &slab_caches, list) | 3385 | list_for_each_entry(s, &slab_caches, list) |
3493 | kmem_cache_shrink(s); | 3386 | kmem_cache_shrink(s); |
3494 | mutex_unlock(&slab_mutex); | 3387 | up_read(&slub_lock); |
3495 | 3388 | ||
3496 | return 0; | 3389 | return 0; |
3497 | } | 3390 | } |
@@ -3503,7 +3396,7 @@ static void slab_mem_offline_callback(void *arg) | |||
3503 | struct memory_notify *marg = arg; | 3396 | struct memory_notify *marg = arg; |
3504 | int offline_node; | 3397 | int offline_node; |
3505 | 3398 | ||
3506 | offline_node = marg->status_change_nid_normal; | 3399 | offline_node = marg->status_change_nid; |
3507 | 3400 | ||
3508 | /* | 3401 | /* |
3509 | * If the node still has available memory. we need kmem_cache_node | 3402 | * If the node still has available memory. we need kmem_cache_node |
@@ -3512,7 +3405,7 @@ static void slab_mem_offline_callback(void *arg) | |||
3512 | if (offline_node < 0) | 3405 | if (offline_node < 0) |
3513 | return; | 3406 | return; |
3514 | 3407 | ||
3515 | mutex_lock(&slab_mutex); | 3408 | down_read(&slub_lock); |
3516 | list_for_each_entry(s, &slab_caches, list) { | 3409 | list_for_each_entry(s, &slab_caches, list) { |
3517 | n = get_node(s, offline_node); | 3410 | n = get_node(s, offline_node); |
3518 | if (n) { | 3411 | if (n) { |
@@ -3528,7 +3421,7 @@ static void slab_mem_offline_callback(void *arg) | |||
3528 | kmem_cache_free(kmem_cache_node, n); | 3421 | kmem_cache_free(kmem_cache_node, n); |
3529 | } | 3422 | } |
3530 | } | 3423 | } |
3531 | mutex_unlock(&slab_mutex); | 3424 | up_read(&slub_lock); |
3532 | } | 3425 | } |
3533 | 3426 | ||
3534 | static int slab_mem_going_online_callback(void *arg) | 3427 | static int slab_mem_going_online_callback(void *arg) |
@@ -3536,7 +3429,7 @@ static int slab_mem_going_online_callback(void *arg) | |||
3536 | struct kmem_cache_node *n; | 3429 | struct kmem_cache_node *n; |
3537 | struct kmem_cache *s; | 3430 | struct kmem_cache *s; |
3538 | struct memory_notify *marg = arg; | 3431 | struct memory_notify *marg = arg; |
3539 | int nid = marg->status_change_nid_normal; | 3432 | int nid = marg->status_change_nid; |
3540 | int ret = 0; | 3433 | int ret = 0; |
3541 | 3434 | ||
3542 | /* | 3435 | /* |
@@ -3551,7 +3444,7 @@ static int slab_mem_going_online_callback(void *arg) | |||
3551 | * allocate a kmem_cache_node structure in order to bring the node | 3444 | * allocate a kmem_cache_node structure in order to bring the node |
3552 | * online. | 3445 | * online. |
3553 | */ | 3446 | */ |
3554 | mutex_lock(&slab_mutex); | 3447 | down_read(&slub_lock); |
3555 | list_for_each_entry(s, &slab_caches, list) { | 3448 | list_for_each_entry(s, &slab_caches, list) { |
3556 | /* | 3449 | /* |
3557 | * XXX: kmem_cache_alloc_node will fallback to other nodes | 3450 | * XXX: kmem_cache_alloc_node will fallback to other nodes |
@@ -3563,11 +3456,11 @@ static int slab_mem_going_online_callback(void *arg) | |||
3563 | ret = -ENOMEM; | 3456 | ret = -ENOMEM; |
3564 | goto out; | 3457 | goto out; |
3565 | } | 3458 | } |
3566 | init_kmem_cache_node(n); | 3459 | init_kmem_cache_node(n, s); |
3567 | s->node[nid] = n; | 3460 | s->node[nid] = n; |
3568 | } | 3461 | } |
3569 | out: | 3462 | out: |
3570 | mutex_unlock(&slab_mutex); | 3463 | up_read(&slub_lock); |
3571 | return ret; | 3464 | return ret; |
3572 | } | 3465 | } |
3573 | 3466 | ||
@@ -3606,16 +3499,15 @@ static int slab_memory_callback(struct notifier_block *self, | |||
3606 | 3499 | ||
3607 | /* | 3500 | /* |
3608 | * Used for early kmem_cache structures that were allocated using | 3501 | * Used for early kmem_cache structures that were allocated using |
3609 | * the page allocator. Allocate them properly then fix up the pointers | 3502 | * the page allocator |
3610 | * that may be pointing to the wrong kmem_cache structure. | ||
3611 | */ | 3503 | */ |
3612 | 3504 | ||
3613 | static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache) | 3505 | static void __init kmem_cache_bootstrap_fixup(struct kmem_cache *s) |
3614 | { | 3506 | { |
3615 | int node; | 3507 | int node; |
3616 | struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT); | ||
3617 | 3508 | ||
3618 | memcpy(s, static_cache, kmem_cache->object_size); | 3509 | list_add(&s->list, &slab_caches); |
3510 | s->refcount = -1; | ||
3619 | 3511 | ||
3620 | for_each_node_state(node, N_NORMAL_MEMORY) { | 3512 | for_each_node_state(node, N_NORMAL_MEMORY) { |
3621 | struct kmem_cache_node *n = get_node(s, node); | 3513 | struct kmem_cache_node *n = get_node(s, node); |
@@ -3623,52 +3515,72 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache) | |||
3623 | 3515 | ||
3624 | if (n) { | 3516 | if (n) { |
3625 | list_for_each_entry(p, &n->partial, lru) | 3517 | list_for_each_entry(p, &n->partial, lru) |
3626 | p->slab_cache = s; | 3518 | p->slab = s; |
3627 | 3519 | ||
3628 | #ifdef CONFIG_SLUB_DEBUG | 3520 | #ifdef CONFIG_SLUB_DEBUG |
3629 | list_for_each_entry(p, &n->full, lru) | 3521 | list_for_each_entry(p, &n->full, lru) |
3630 | p->slab_cache = s; | 3522 | p->slab = s; |
3631 | #endif | 3523 | #endif |
3632 | } | 3524 | } |
3633 | } | 3525 | } |
3634 | list_add(&s->list, &slab_caches); | ||
3635 | return s; | ||
3636 | } | 3526 | } |
3637 | 3527 | ||
3638 | void __init kmem_cache_init(void) | 3528 | void __init kmem_cache_init(void) |
3639 | { | 3529 | { |
3640 | static __initdata struct kmem_cache boot_kmem_cache, | ||
3641 | boot_kmem_cache_node; | ||
3642 | int i; | 3530 | int i; |
3643 | int caches = 2; | 3531 | int caches = 0; |
3532 | struct kmem_cache *temp_kmem_cache; | ||
3533 | int order; | ||
3534 | struct kmem_cache *temp_kmem_cache_node; | ||
3535 | unsigned long kmalloc_size; | ||
3644 | 3536 | ||
3645 | if (debug_guardpage_minorder()) | 3537 | kmem_size = offsetof(struct kmem_cache, node) + |
3646 | slub_max_order = 0; | 3538 | nr_node_ids * sizeof(struct kmem_cache_node *); |
3647 | 3539 | ||
3648 | kmem_cache_node = &boot_kmem_cache_node; | 3540 | /* Allocate two kmem_caches from the page allocator */ |
3649 | kmem_cache = &boot_kmem_cache; | 3541 | kmalloc_size = ALIGN(kmem_size, cache_line_size()); |
3542 | order = get_order(2 * kmalloc_size); | ||
3543 | kmem_cache = (void *)__get_free_pages(GFP_NOWAIT, order); | ||
3650 | 3544 | ||
3651 | create_boot_cache(kmem_cache_node, "kmem_cache_node", | 3545 | /* |
3652 | sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN); | 3546 | * Must first have the slab cache available for the allocations of the |
3547 | * struct kmem_cache_node's. There is special bootstrap code in | ||
3548 | * kmem_cache_open for slab_state == DOWN. | ||
3549 | */ | ||
3550 | kmem_cache_node = (void *)kmem_cache + kmalloc_size; | ||
3551 | |||
3552 | kmem_cache_open(kmem_cache_node, "kmem_cache_node", | ||
3553 | sizeof(struct kmem_cache_node), | ||
3554 | 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); | ||
3653 | 3555 | ||
3654 | hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI); | 3556 | hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI); |
3655 | 3557 | ||
3656 | /* Able to allocate the per node structures */ | 3558 | /* Able to allocate the per node structures */ |
3657 | slab_state = PARTIAL; | 3559 | slab_state = PARTIAL; |
3658 | 3560 | ||
3659 | create_boot_cache(kmem_cache, "kmem_cache", | 3561 | temp_kmem_cache = kmem_cache; |
3660 | offsetof(struct kmem_cache, node) + | 3562 | kmem_cache_open(kmem_cache, "kmem_cache", kmem_size, |
3661 | nr_node_ids * sizeof(struct kmem_cache_node *), | 3563 | 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); |
3662 | SLAB_HWCACHE_ALIGN); | 3564 | kmem_cache = kmem_cache_alloc(kmem_cache, GFP_NOWAIT); |
3663 | 3565 | memcpy(kmem_cache, temp_kmem_cache, kmem_size); | |
3664 | kmem_cache = bootstrap(&boot_kmem_cache); | ||
3665 | 3566 | ||
3666 | /* | 3567 | /* |
3667 | * Allocate kmem_cache_node properly from the kmem_cache slab. | 3568 | * Allocate kmem_cache_node properly from the kmem_cache slab. |
3668 | * kmem_cache_node is separately allocated so no need to | 3569 | * kmem_cache_node is separately allocated so no need to |
3669 | * update any list pointers. | 3570 | * update any list pointers. |
3670 | */ | 3571 | */ |
3671 | kmem_cache_node = bootstrap(&boot_kmem_cache_node); | 3572 | temp_kmem_cache_node = kmem_cache_node; |
3573 | |||
3574 | kmem_cache_node = kmem_cache_alloc(kmem_cache, GFP_NOWAIT); | ||
3575 | memcpy(kmem_cache_node, temp_kmem_cache_node, kmem_size); | ||
3576 | |||
3577 | kmem_cache_bootstrap_fixup(kmem_cache_node); | ||
3578 | |||
3579 | caches++; | ||
3580 | kmem_cache_bootstrap_fixup(kmem_cache); | ||
3581 | caches++; | ||
3582 | /* Free temporary boot structure */ | ||
3583 | free_pages((unsigned long)temp_kmem_cache, order); | ||
3672 | 3584 | ||
3673 | /* Now we can use the kmem_cache to allocate kmalloc slabs */ | 3585 | /* Now we can use the kmem_cache to allocate kmalloc slabs */ |
3674 | 3586 | ||
@@ -3756,11 +3668,11 @@ void __init kmem_cache_init(void) | |||
3756 | 3668 | ||
3757 | if (s && s->size) { | 3669 | if (s && s->size) { |
3758 | char *name = kasprintf(GFP_NOWAIT, | 3670 | char *name = kasprintf(GFP_NOWAIT, |
3759 | "dma-kmalloc-%d", s->object_size); | 3671 | "dma-kmalloc-%d", s->objsize); |
3760 | 3672 | ||
3761 | BUG_ON(!name); | 3673 | BUG_ON(!name); |
3762 | kmalloc_dma_caches[i] = create_kmalloc_cache(name, | 3674 | kmalloc_dma_caches[i] = create_kmalloc_cache(name, |
3763 | s->object_size, SLAB_CACHE_DMA); | 3675 | s->objsize, SLAB_CACHE_DMA); |
3764 | } | 3676 | } |
3765 | } | 3677 | } |
3766 | #endif | 3678 | #endif |
@@ -3796,7 +3708,7 @@ static int slab_unmergeable(struct kmem_cache *s) | |||
3796 | return 0; | 3708 | return 0; |
3797 | } | 3709 | } |
3798 | 3710 | ||
3799 | static struct kmem_cache *find_mergeable(struct mem_cgroup *memcg, size_t size, | 3711 | static struct kmem_cache *find_mergeable(size_t size, |
3800 | size_t align, unsigned long flags, const char *name, | 3712 | size_t align, unsigned long flags, const char *name, |
3801 | void (*ctor)(void *)) | 3713 | void (*ctor)(void *)) |
3802 | { | 3714 | { |
@@ -3832,61 +3744,70 @@ static struct kmem_cache *find_mergeable(struct mem_cgroup *memcg, size_t size, | |||
3832 | if (s->size - size >= sizeof(void *)) | 3744 | if (s->size - size >= sizeof(void *)) |
3833 | continue; | 3745 | continue; |
3834 | 3746 | ||
3835 | if (!cache_match_memcg(s, memcg)) | ||
3836 | continue; | ||
3837 | |||
3838 | return s; | 3747 | return s; |
3839 | } | 3748 | } |
3840 | return NULL; | 3749 | return NULL; |
3841 | } | 3750 | } |
3842 | 3751 | ||
3843 | struct kmem_cache * | 3752 | struct kmem_cache *kmem_cache_create(const char *name, size_t size, |
3844 | __kmem_cache_alias(struct mem_cgroup *memcg, const char *name, size_t size, | 3753 | size_t align, unsigned long flags, void (*ctor)(void *)) |
3845 | size_t align, unsigned long flags, void (*ctor)(void *)) | ||
3846 | { | 3754 | { |
3847 | struct kmem_cache *s; | 3755 | struct kmem_cache *s; |
3756 | char *n; | ||
3757 | |||
3758 | if (WARN_ON(!name)) | ||
3759 | return NULL; | ||
3848 | 3760 | ||
3849 | s = find_mergeable(memcg, size, align, flags, name, ctor); | 3761 | down_write(&slub_lock); |
3762 | s = find_mergeable(size, align, flags, name, ctor); | ||
3850 | if (s) { | 3763 | if (s) { |
3851 | s->refcount++; | 3764 | s->refcount++; |
3852 | /* | 3765 | /* |
3853 | * Adjust the object sizes so that we clear | 3766 | * Adjust the object sizes so that we clear |
3854 | * the complete object on kzalloc. | 3767 | * the complete object on kzalloc. |
3855 | */ | 3768 | */ |
3856 | s->object_size = max(s->object_size, (int)size); | 3769 | s->objsize = max(s->objsize, (int)size); |
3857 | s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); | 3770 | s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); |
3858 | 3771 | ||
3859 | if (sysfs_slab_alias(s, name)) { | 3772 | if (sysfs_slab_alias(s, name)) { |
3860 | s->refcount--; | 3773 | s->refcount--; |
3861 | s = NULL; | 3774 | goto err; |
3862 | } | 3775 | } |
3776 | up_write(&slub_lock); | ||
3777 | return s; | ||
3863 | } | 3778 | } |
3864 | 3779 | ||
3865 | return s; | 3780 | n = kstrdup(name, GFP_KERNEL); |
3866 | } | 3781 | if (!n) |
3782 | goto err; | ||
3867 | 3783 | ||
3868 | int __kmem_cache_create(struct kmem_cache *s, unsigned long flags) | 3784 | s = kmalloc(kmem_size, GFP_KERNEL); |
3869 | { | 3785 | if (s) { |
3870 | int err; | 3786 | if (kmem_cache_open(s, n, |
3871 | 3787 | size, align, flags, ctor)) { | |
3872 | err = kmem_cache_open(s, flags); | 3788 | list_add(&s->list, &slab_caches); |
3873 | if (err) | 3789 | if (sysfs_slab_add(s)) { |
3874 | return err; | 3790 | list_del(&s->list); |
3875 | 3791 | kfree(n); | |
3876 | /* Mutex is not taken during early boot */ | 3792 | kfree(s); |
3877 | if (slab_state <= UP) | 3793 | goto err; |
3878 | return 0; | 3794 | } |
3879 | 3795 | up_write(&slub_lock); | |
3880 | memcg_propagate_slab_attrs(s); | 3796 | return s; |
3881 | mutex_unlock(&slab_mutex); | 3797 | } |
3882 | err = sysfs_slab_add(s); | 3798 | kfree(n); |
3883 | mutex_lock(&slab_mutex); | 3799 | kfree(s); |
3884 | 3800 | } | |
3885 | if (err) | 3801 | err: |
3886 | kmem_cache_close(s); | 3802 | up_write(&slub_lock); |
3887 | 3803 | ||
3888 | return err; | 3804 | if (flags & SLAB_PANIC) |
3805 | panic("Cannot create slabcache %s\n", name); | ||
3806 | else | ||
3807 | s = NULL; | ||
3808 | return s; | ||
3889 | } | 3809 | } |
3810 | EXPORT_SYMBOL(kmem_cache_create); | ||
3890 | 3811 | ||
3891 | #ifdef CONFIG_SMP | 3812 | #ifdef CONFIG_SMP |
3892 | /* | 3813 | /* |
@@ -3905,13 +3826,13 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb, | |||
3905 | case CPU_UP_CANCELED_FROZEN: | 3826 | case CPU_UP_CANCELED_FROZEN: |
3906 | case CPU_DEAD: | 3827 | case CPU_DEAD: |
3907 | case CPU_DEAD_FROZEN: | 3828 | case CPU_DEAD_FROZEN: |
3908 | mutex_lock(&slab_mutex); | 3829 | down_read(&slub_lock); |
3909 | list_for_each_entry(s, &slab_caches, list) { | 3830 | list_for_each_entry(s, &slab_caches, list) { |
3910 | local_irq_save(flags); | 3831 | local_irq_save(flags); |
3911 | __flush_cpu_slab(s, cpu); | 3832 | __flush_cpu_slab(s, cpu); |
3912 | local_irq_restore(flags); | 3833 | local_irq_restore(flags); |
3913 | } | 3834 | } |
3914 | mutex_unlock(&slab_mutex); | 3835 | up_read(&slub_lock); |
3915 | break; | 3836 | break; |
3916 | default: | 3837 | default: |
3917 | break; | 3838 | break; |
@@ -3938,7 +3859,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller) | |||
3938 | if (unlikely(ZERO_OR_NULL_PTR(s))) | 3859 | if (unlikely(ZERO_OR_NULL_PTR(s))) |
3939 | return s; | 3860 | return s; |
3940 | 3861 | ||
3941 | ret = slab_alloc(s, gfpflags, caller); | 3862 | ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, caller); |
3942 | 3863 | ||
3943 | /* Honor the call site pointer we received. */ | 3864 | /* Honor the call site pointer we received. */ |
3944 | trace_kmalloc(caller, ret, size, s->size, gfpflags); | 3865 | trace_kmalloc(caller, ret, size, s->size, gfpflags); |
@@ -3968,7 +3889,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, | |||
3968 | if (unlikely(ZERO_OR_NULL_PTR(s))) | 3889 | if (unlikely(ZERO_OR_NULL_PTR(s))) |
3969 | return s; | 3890 | return s; |
3970 | 3891 | ||
3971 | ret = slab_alloc_node(s, gfpflags, node, caller); | 3892 | ret = slab_alloc(s, gfpflags, node, caller); |
3972 | 3893 | ||
3973 | /* Honor the call site pointer we received. */ | 3894 | /* Honor the call site pointer we received. */ |
3974 | trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node); | 3895 | trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node); |
@@ -4403,32 +4324,22 @@ static ssize_t show_slab_objects(struct kmem_cache *s, | |||
4403 | 4324 | ||
4404 | for_each_possible_cpu(cpu) { | 4325 | for_each_possible_cpu(cpu) { |
4405 | struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); | 4326 | struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); |
4406 | int node; | ||
4407 | struct page *page; | ||
4408 | 4327 | ||
4409 | page = ACCESS_ONCE(c->page); | 4328 | if (!c || c->node < 0) |
4410 | if (!page) | ||
4411 | continue; | 4329 | continue; |
4412 | 4330 | ||
4413 | node = page_to_nid(page); | 4331 | if (c->page) { |
4414 | if (flags & SO_TOTAL) | 4332 | if (flags & SO_TOTAL) |
4415 | x = page->objects; | 4333 | x = c->page->objects; |
4416 | else if (flags & SO_OBJECTS) | 4334 | else if (flags & SO_OBJECTS) |
4417 | x = page->inuse; | 4335 | x = c->page->inuse; |
4418 | else | 4336 | else |
4419 | x = 1; | 4337 | x = 1; |
4420 | |||
4421 | total += x; | ||
4422 | nodes[node] += x; | ||
4423 | 4338 | ||
4424 | page = ACCESS_ONCE(c->partial); | ||
4425 | if (page) { | ||
4426 | x = page->pobjects; | ||
4427 | total += x; | 4339 | total += x; |
4428 | nodes[node] += x; | 4340 | nodes[c->node] += x; |
4429 | } | 4341 | } |
4430 | 4342 | per_cpu[c->node]++; | |
4431 | per_cpu[node]++; | ||
4432 | } | 4343 | } |
4433 | } | 4344 | } |
4434 | 4345 | ||
@@ -4506,12 +4417,11 @@ struct slab_attribute { | |||
4506 | }; | 4417 | }; |
4507 | 4418 | ||
4508 | #define SLAB_ATTR_RO(_name) \ | 4419 | #define SLAB_ATTR_RO(_name) \ |
4509 | static struct slab_attribute _name##_attr = \ | 4420 | static struct slab_attribute _name##_attr = __ATTR_RO(_name) |
4510 | __ATTR(_name, 0400, _name##_show, NULL) | ||
4511 | 4421 | ||
4512 | #define SLAB_ATTR(_name) \ | 4422 | #define SLAB_ATTR(_name) \ |
4513 | static struct slab_attribute _name##_attr = \ | 4423 | static struct slab_attribute _name##_attr = \ |
4514 | __ATTR(_name, 0600, _name##_show, _name##_store) | 4424 | __ATTR(_name, 0644, _name##_show, _name##_store) |
4515 | 4425 | ||
4516 | static ssize_t slab_size_show(struct kmem_cache *s, char *buf) | 4426 | static ssize_t slab_size_show(struct kmem_cache *s, char *buf) |
4517 | { | 4427 | { |
@@ -4527,7 +4437,7 @@ SLAB_ATTR_RO(align); | |||
4527 | 4437 | ||
4528 | static ssize_t object_size_show(struct kmem_cache *s, char *buf) | 4438 | static ssize_t object_size_show(struct kmem_cache *s, char *buf) |
4529 | { | 4439 | { |
4530 | return sprintf(buf, "%d\n", s->object_size); | 4440 | return sprintf(buf, "%d\n", s->objsize); |
4531 | } | 4441 | } |
4532 | SLAB_ATTR_RO(object_size); | 4442 | SLAB_ATTR_RO(object_size); |
4533 | 4443 | ||
@@ -4580,29 +4490,6 @@ static ssize_t min_partial_store(struct kmem_cache *s, const char *buf, | |||
4580 | } | 4490 | } |
4581 | SLAB_ATTR(min_partial); | 4491 | SLAB_ATTR(min_partial); |
4582 | 4492 | ||
4583 | static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf) | ||
4584 | { | ||
4585 | return sprintf(buf, "%u\n", s->cpu_partial); | ||
4586 | } | ||
4587 | |||
4588 | static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf, | ||
4589 | size_t length) | ||
4590 | { | ||
4591 | unsigned long objects; | ||
4592 | int err; | ||
4593 | |||
4594 | err = strict_strtoul(buf, 10, &objects); | ||
4595 | if (err) | ||
4596 | return err; | ||
4597 | if (objects && kmem_cache_debug(s)) | ||
4598 | return -EINVAL; | ||
4599 | |||
4600 | s->cpu_partial = objects; | ||
4601 | flush_all(s); | ||
4602 | return length; | ||
4603 | } | ||
4604 | SLAB_ATTR(cpu_partial); | ||
4605 | |||
4606 | static ssize_t ctor_show(struct kmem_cache *s, char *buf) | 4493 | static ssize_t ctor_show(struct kmem_cache *s, char *buf) |
4607 | { | 4494 | { |
4608 | if (!s->ctor) | 4495 | if (!s->ctor) |
@@ -4641,37 +4528,6 @@ static ssize_t objects_partial_show(struct kmem_cache *s, char *buf) | |||
4641 | } | 4528 | } |
4642 | SLAB_ATTR_RO(objects_partial); | 4529 | SLAB_ATTR_RO(objects_partial); |
4643 | 4530 | ||
4644 | static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf) | ||
4645 | { | ||
4646 | int objects = 0; | ||
4647 | int pages = 0; | ||
4648 | int cpu; | ||
4649 | int len; | ||
4650 | |||
4651 | for_each_online_cpu(cpu) { | ||
4652 | struct page *page = per_cpu_ptr(s->cpu_slab, cpu)->partial; | ||
4653 | |||
4654 | if (page) { | ||
4655 | pages += page->pages; | ||
4656 | objects += page->pobjects; | ||
4657 | } | ||
4658 | } | ||
4659 | |||
4660 | len = sprintf(buf, "%d(%d)", objects, pages); | ||
4661 | |||
4662 | #ifdef CONFIG_SMP | ||
4663 | for_each_online_cpu(cpu) { | ||
4664 | struct page *page = per_cpu_ptr(s->cpu_slab, cpu) ->partial; | ||
4665 | |||
4666 | if (page && len < PAGE_SIZE - 20) | ||
4667 | len += sprintf(buf + len, " C%d=%d(%d)", cpu, | ||
4668 | page->pobjects, page->pages); | ||
4669 | } | ||
4670 | #endif | ||
4671 | return len + sprintf(buf + len, "\n"); | ||
4672 | } | ||
4673 | SLAB_ATTR_RO(slabs_cpu_partial); | ||
4674 | |||
4675 | static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf) | 4531 | static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf) |
4676 | { | 4532 | { |
4677 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT)); | 4533 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT)); |
@@ -4994,10 +4850,6 @@ STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass); | |||
4994 | STAT_ATTR(ORDER_FALLBACK, order_fallback); | 4850 | STAT_ATTR(ORDER_FALLBACK, order_fallback); |
4995 | STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail); | 4851 | STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail); |
4996 | STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail); | 4852 | STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail); |
4997 | STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc); | ||
4998 | STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free); | ||
4999 | STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node); | ||
5000 | STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain); | ||
5001 | #endif | 4853 | #endif |
5002 | 4854 | ||
5003 | static struct attribute *slab_attrs[] = { | 4855 | static struct attribute *slab_attrs[] = { |
@@ -5006,7 +4858,6 @@ static struct attribute *slab_attrs[] = { | |||
5006 | &objs_per_slab_attr.attr, | 4858 | &objs_per_slab_attr.attr, |
5007 | &order_attr.attr, | 4859 | &order_attr.attr, |
5008 | &min_partial_attr.attr, | 4860 | &min_partial_attr.attr, |
5009 | &cpu_partial_attr.attr, | ||
5010 | &objects_attr.attr, | 4861 | &objects_attr.attr, |
5011 | &objects_partial_attr.attr, | 4862 | &objects_partial_attr.attr, |
5012 | &partial_attr.attr, | 4863 | &partial_attr.attr, |
@@ -5019,7 +4870,6 @@ static struct attribute *slab_attrs[] = { | |||
5019 | &destroy_by_rcu_attr.attr, | 4870 | &destroy_by_rcu_attr.attr, |
5020 | &shrink_attr.attr, | 4871 | &shrink_attr.attr, |
5021 | &reserved_attr.attr, | 4872 | &reserved_attr.attr, |
5022 | &slabs_cpu_partial_attr.attr, | ||
5023 | #ifdef CONFIG_SLUB_DEBUG | 4873 | #ifdef CONFIG_SLUB_DEBUG |
5024 | &total_objects_attr.attr, | 4874 | &total_objects_attr.attr, |
5025 | &slabs_attr.attr, | 4875 | &slabs_attr.attr, |
@@ -5061,10 +4911,6 @@ static struct attribute *slab_attrs[] = { | |||
5061 | &order_fallback_attr.attr, | 4911 | &order_fallback_attr.attr, |
5062 | &cmpxchg_double_fail_attr.attr, | 4912 | &cmpxchg_double_fail_attr.attr, |
5063 | &cmpxchg_double_cpu_fail_attr.attr, | 4913 | &cmpxchg_double_cpu_fail_attr.attr, |
5064 | &cpu_partial_alloc_attr.attr, | ||
5065 | &cpu_partial_free_attr.attr, | ||
5066 | &cpu_partial_node_attr.attr, | ||
5067 | &cpu_partial_drain_attr.attr, | ||
5068 | #endif | 4914 | #endif |
5069 | #ifdef CONFIG_FAILSLAB | 4915 | #ifdef CONFIG_FAILSLAB |
5070 | &failslab_attr.attr, | 4916 | &failslab_attr.attr, |
@@ -5111,93 +4957,16 @@ static ssize_t slab_attr_store(struct kobject *kobj, | |||
5111 | return -EIO; | 4957 | return -EIO; |
5112 | 4958 | ||
5113 | err = attribute->store(s, buf, len); | 4959 | err = attribute->store(s, buf, len); |
5114 | #ifdef CONFIG_MEMCG_KMEM | ||
5115 | if (slab_state >= FULL && err >= 0 && is_root_cache(s)) { | ||
5116 | int i; | ||
5117 | |||
5118 | mutex_lock(&slab_mutex); | ||
5119 | if (s->max_attr_size < len) | ||
5120 | s->max_attr_size = len; | ||
5121 | 4960 | ||
5122 | /* | ||
5123 | * This is a best effort propagation, so this function's return | ||
5124 | * value will be determined by the parent cache only. This is | ||
5125 | * basically because not all attributes will have a well | ||
5126 | * defined semantics for rollbacks - most of the actions will | ||
5127 | * have permanent effects. | ||
5128 | * | ||
5129 | * Returning the error value of any of the children that fail | ||
5130 | * is not 100 % defined, in the sense that users seeing the | ||
5131 | * error code won't be able to know anything about the state of | ||
5132 | * the cache. | ||
5133 | * | ||
5134 | * Only returning the error code for the parent cache at least | ||
5135 | * has well defined semantics. The cache being written to | ||
5136 | * directly either failed or succeeded, in which case we loop | ||
5137 | * through the descendants with best-effort propagation. | ||
5138 | */ | ||
5139 | for_each_memcg_cache_index(i) { | ||
5140 | struct kmem_cache *c = cache_from_memcg(s, i); | ||
5141 | if (c) | ||
5142 | attribute->store(c, buf, len); | ||
5143 | } | ||
5144 | mutex_unlock(&slab_mutex); | ||
5145 | } | ||
5146 | #endif | ||
5147 | return err; | 4961 | return err; |
5148 | } | 4962 | } |
5149 | 4963 | ||
5150 | static void memcg_propagate_slab_attrs(struct kmem_cache *s) | 4964 | static void kmem_cache_release(struct kobject *kobj) |
5151 | { | 4965 | { |
5152 | #ifdef CONFIG_MEMCG_KMEM | 4966 | struct kmem_cache *s = to_slab(kobj); |
5153 | int i; | ||
5154 | char *buffer = NULL; | ||
5155 | |||
5156 | if (!is_root_cache(s)) | ||
5157 | return; | ||
5158 | |||
5159 | /* | ||
5160 | * This mean this cache had no attribute written. Therefore, no point | ||
5161 | * in copying default values around | ||
5162 | */ | ||
5163 | if (!s->max_attr_size) | ||
5164 | return; | ||
5165 | 4967 | ||
5166 | for (i = 0; i < ARRAY_SIZE(slab_attrs); i++) { | 4968 | kfree(s->name); |
5167 | char mbuf[64]; | 4969 | kfree(s); |
5168 | char *buf; | ||
5169 | struct slab_attribute *attr = to_slab_attr(slab_attrs[i]); | ||
5170 | |||
5171 | if (!attr || !attr->store || !attr->show) | ||
5172 | continue; | ||
5173 | |||
5174 | /* | ||
5175 | * It is really bad that we have to allocate here, so we will | ||
5176 | * do it only as a fallback. If we actually allocate, though, | ||
5177 | * we can just use the allocated buffer until the end. | ||
5178 | * | ||
5179 | * Most of the slub attributes will tend to be very small in | ||
5180 | * size, but sysfs allows buffers up to a page, so they can | ||
5181 | * theoretically happen. | ||
5182 | */ | ||
5183 | if (buffer) | ||
5184 | buf = buffer; | ||
5185 | else if (s->max_attr_size < ARRAY_SIZE(mbuf)) | ||
5186 | buf = mbuf; | ||
5187 | else { | ||
5188 | buffer = (char *) get_zeroed_page(GFP_KERNEL); | ||
5189 | if (WARN_ON(!buffer)) | ||
5190 | continue; | ||
5191 | buf = buffer; | ||
5192 | } | ||
5193 | |||
5194 | attr->show(s->memcg_params->root_cache, buf); | ||
5195 | attr->store(s, buf, strlen(buf)); | ||
5196 | } | ||
5197 | |||
5198 | if (buffer) | ||
5199 | free_page((unsigned long)buffer); | ||
5200 | #endif | ||
5201 | } | 4970 | } |
5202 | 4971 | ||
5203 | static const struct sysfs_ops slab_sysfs_ops = { | 4972 | static const struct sysfs_ops slab_sysfs_ops = { |
@@ -5207,6 +4976,7 @@ static const struct sysfs_ops slab_sysfs_ops = { | |||
5207 | 4976 | ||
5208 | static struct kobj_type slab_ktype = { | 4977 | static struct kobj_type slab_ktype = { |
5209 | .sysfs_ops = &slab_sysfs_ops, | 4978 | .sysfs_ops = &slab_sysfs_ops, |
4979 | .release = kmem_cache_release | ||
5210 | }; | 4980 | }; |
5211 | 4981 | ||
5212 | static int uevent_filter(struct kset *kset, struct kobject *kobj) | 4982 | static int uevent_filter(struct kset *kset, struct kobject *kobj) |
@@ -5256,12 +5026,6 @@ static char *create_unique_id(struct kmem_cache *s) | |||
5256 | if (p != name + 1) | 5026 | if (p != name + 1) |
5257 | *p++ = '-'; | 5027 | *p++ = '-'; |
5258 | p += sprintf(p, "%07d", s->size); | 5028 | p += sprintf(p, "%07d", s->size); |
5259 | |||
5260 | #ifdef CONFIG_MEMCG_KMEM | ||
5261 | if (!is_root_cache(s)) | ||
5262 | p += sprintf(p, "-%08d", memcg_cache_id(s->memcg_params->memcg)); | ||
5263 | #endif | ||
5264 | |||
5265 | BUG_ON(p > name + ID_STR_LENGTH - 1); | 5029 | BUG_ON(p > name + ID_STR_LENGTH - 1); |
5266 | return name; | 5030 | return name; |
5267 | } | 5031 | } |
@@ -5270,8 +5034,13 @@ static int sysfs_slab_add(struct kmem_cache *s) | |||
5270 | { | 5034 | { |
5271 | int err; | 5035 | int err; |
5272 | const char *name; | 5036 | const char *name; |
5273 | int unmergeable = slab_unmergeable(s); | 5037 | int unmergeable; |
5038 | |||
5039 | if (slab_state < SYSFS) | ||
5040 | /* Defer until later */ | ||
5041 | return 0; | ||
5274 | 5042 | ||
5043 | unmergeable = slab_unmergeable(s); | ||
5275 | if (unmergeable) { | 5044 | if (unmergeable) { |
5276 | /* | 5045 | /* |
5277 | * Slabcache can never be merged so we can use the name proper. | 5046 | * Slabcache can never be merged so we can use the name proper. |
@@ -5312,7 +5081,7 @@ static int sysfs_slab_add(struct kmem_cache *s) | |||
5312 | 5081 | ||
5313 | static void sysfs_slab_remove(struct kmem_cache *s) | 5082 | static void sysfs_slab_remove(struct kmem_cache *s) |
5314 | { | 5083 | { |
5315 | if (slab_state < FULL) | 5084 | if (slab_state < SYSFS) |
5316 | /* | 5085 | /* |
5317 | * Sysfs has not been setup yet so no need to remove the | 5086 | * Sysfs has not been setup yet so no need to remove the |
5318 | * cache from sysfs. | 5087 | * cache from sysfs. |
@@ -5340,7 +5109,7 @@ static int sysfs_slab_alias(struct kmem_cache *s, const char *name) | |||
5340 | { | 5109 | { |
5341 | struct saved_alias *al; | 5110 | struct saved_alias *al; |
5342 | 5111 | ||
5343 | if (slab_state == FULL) { | 5112 | if (slab_state == SYSFS) { |
5344 | /* | 5113 | /* |
5345 | * If we have a leftover link then remove it. | 5114 | * If we have a leftover link then remove it. |
5346 | */ | 5115 | */ |
@@ -5364,16 +5133,16 @@ static int __init slab_sysfs_init(void) | |||
5364 | struct kmem_cache *s; | 5133 | struct kmem_cache *s; |
5365 | int err; | 5134 | int err; |
5366 | 5135 | ||
5367 | mutex_lock(&slab_mutex); | 5136 | down_write(&slub_lock); |
5368 | 5137 | ||
5369 | slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj); | 5138 | slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj); |
5370 | if (!slab_kset) { | 5139 | if (!slab_kset) { |
5371 | mutex_unlock(&slab_mutex); | 5140 | up_write(&slub_lock); |
5372 | printk(KERN_ERR "Cannot register slab subsystem.\n"); | 5141 | printk(KERN_ERR "Cannot register slab subsystem.\n"); |
5373 | return -ENOSYS; | 5142 | return -ENOSYS; |
5374 | } | 5143 | } |
5375 | 5144 | ||
5376 | slab_state = FULL; | 5145 | slab_state = SYSFS; |
5377 | 5146 | ||
5378 | list_for_each_entry(s, &slab_caches, list) { | 5147 | list_for_each_entry(s, &slab_caches, list) { |
5379 | err = sysfs_slab_add(s); | 5148 | err = sysfs_slab_add(s); |
@@ -5389,11 +5158,11 @@ static int __init slab_sysfs_init(void) | |||
5389 | err = sysfs_slab_alias(al->s, al->name); | 5158 | err = sysfs_slab_alias(al->s, al->name); |
5390 | if (err) | 5159 | if (err) |
5391 | printk(KERN_ERR "SLUB: Unable to add boot slab alias" | 5160 | printk(KERN_ERR "SLUB: Unable to add boot slab alias" |
5392 | " %s to sysfs\n", al->name); | 5161 | " %s to sysfs\n", s->name); |
5393 | kfree(al); | 5162 | kfree(al); |
5394 | } | 5163 | } |
5395 | 5164 | ||
5396 | mutex_unlock(&slab_mutex); | 5165 | up_write(&slub_lock); |
5397 | resiliency_test(); | 5166 | resiliency_test(); |
5398 | return 0; | 5167 | return 0; |
5399 | } | 5168 | } |
@@ -5405,14 +5174,49 @@ __initcall(slab_sysfs_init); | |||
5405 | * The /proc/slabinfo ABI | 5174 | * The /proc/slabinfo ABI |
5406 | */ | 5175 | */ |
5407 | #ifdef CONFIG_SLABINFO | 5176 | #ifdef CONFIG_SLABINFO |
5408 | void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo) | 5177 | static void print_slabinfo_header(struct seq_file *m) |
5178 | { | ||
5179 | seq_puts(m, "slabinfo - version: 2.1\n"); | ||
5180 | seq_puts(m, "# name <active_objs> <num_objs> <objsize> " | ||
5181 | "<objperslab> <pagesperslab>"); | ||
5182 | seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>"); | ||
5183 | seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>"); | ||
5184 | seq_putc(m, '\n'); | ||
5185 | } | ||
5186 | |||
5187 | static void *s_start(struct seq_file *m, loff_t *pos) | ||
5188 | { | ||
5189 | loff_t n = *pos; | ||
5190 | |||
5191 | down_read(&slub_lock); | ||
5192 | if (!n) | ||
5193 | print_slabinfo_header(m); | ||
5194 | |||
5195 | return seq_list_start(&slab_caches, *pos); | ||
5196 | } | ||
5197 | |||
5198 | static void *s_next(struct seq_file *m, void *p, loff_t *pos) | ||
5199 | { | ||
5200 | return seq_list_next(p, &slab_caches, pos); | ||
5201 | } | ||
5202 | |||
5203 | static void s_stop(struct seq_file *m, void *p) | ||
5204 | { | ||
5205 | up_read(&slub_lock); | ||
5206 | } | ||
5207 | |||
5208 | static int s_show(struct seq_file *m, void *p) | ||
5409 | { | 5209 | { |
5410 | unsigned long nr_partials = 0; | 5210 | unsigned long nr_partials = 0; |
5411 | unsigned long nr_slabs = 0; | 5211 | unsigned long nr_slabs = 0; |
5212 | unsigned long nr_inuse = 0; | ||
5412 | unsigned long nr_objs = 0; | 5213 | unsigned long nr_objs = 0; |
5413 | unsigned long nr_free = 0; | 5214 | unsigned long nr_free = 0; |
5215 | struct kmem_cache *s; | ||
5414 | int node; | 5216 | int node; |
5415 | 5217 | ||
5218 | s = list_entry(p, struct kmem_cache, list); | ||
5219 | |||
5416 | for_each_online_node(node) { | 5220 | for_each_online_node(node) { |
5417 | struct kmem_cache_node *n = get_node(s, node); | 5221 | struct kmem_cache_node *n = get_node(s, node); |
5418 | 5222 | ||
@@ -5425,21 +5229,41 @@ void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo) | |||
5425 | nr_free += count_partial(n, count_free); | 5229 | nr_free += count_partial(n, count_free); |
5426 | } | 5230 | } |
5427 | 5231 | ||
5428 | sinfo->active_objs = nr_objs - nr_free; | 5232 | nr_inuse = nr_objs - nr_free; |
5429 | sinfo->num_objs = nr_objs; | 5233 | |
5430 | sinfo->active_slabs = nr_slabs; | 5234 | seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", s->name, nr_inuse, |
5431 | sinfo->num_slabs = nr_slabs; | 5235 | nr_objs, s->size, oo_objects(s->oo), |
5432 | sinfo->objects_per_slab = oo_objects(s->oo); | 5236 | (1 << oo_order(s->oo))); |
5433 | sinfo->cache_order = oo_order(s->oo); | 5237 | seq_printf(m, " : tunables %4u %4u %4u", 0, 0, 0); |
5238 | seq_printf(m, " : slabdata %6lu %6lu %6lu", nr_slabs, nr_slabs, | ||
5239 | 0UL); | ||
5240 | seq_putc(m, '\n'); | ||
5241 | return 0; | ||
5434 | } | 5242 | } |
5435 | 5243 | ||
5436 | void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s) | 5244 | static const struct seq_operations slabinfo_op = { |
5245 | .start = s_start, | ||
5246 | .next = s_next, | ||
5247 | .stop = s_stop, | ||
5248 | .show = s_show, | ||
5249 | }; | ||
5250 | |||
5251 | static int slabinfo_open(struct inode *inode, struct file *file) | ||
5437 | { | 5252 | { |
5253 | return seq_open(file, &slabinfo_op); | ||
5438 | } | 5254 | } |
5439 | 5255 | ||
5440 | ssize_t slabinfo_write(struct file *file, const char __user *buffer, | 5256 | static const struct file_operations proc_slabinfo_operations = { |
5441 | size_t count, loff_t *ppos) | 5257 | .open = slabinfo_open, |
5258 | .read = seq_read, | ||
5259 | .llseek = seq_lseek, | ||
5260 | .release = seq_release, | ||
5261 | }; | ||
5262 | |||
5263 | static int __init slab_proc_init(void) | ||
5442 | { | 5264 | { |
5443 | return -EIO; | 5265 | proc_create("slabinfo", S_IRUGO, NULL, &proc_slabinfo_operations); |
5266 | return 0; | ||
5444 | } | 5267 | } |
5268 | module_init(slab_proc_init); | ||
5445 | #endif /* CONFIG_SLABINFO */ | 5269 | #endif /* CONFIG_SLABINFO */ |