aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-04-28 17:08:56 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-04-28 17:08:56 -0400
commite97e386b126c2d60b8da61ce1e4964b41b3d1514 (patch)
tree7e04b7f735004330777200c6742568fc130ff893
parentd9dedc13851f9cbd568fbc631a17b0be83404957 (diff)
parentc124f5b54f879e5870befcc076addbd5d614663f (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/slab-2.6
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/slab-2.6: slub: pack objects denser slub: Calculate min_objects based on number of processors. slub: Drop DEFAULT_MAX_ORDER / DEFAULT_MIN_OBJECTS slub: Simplify any_slab_object checks slub: Make the order configurable for each slab cache slub: Drop fallback to page allocator method slub: Fallback to minimal order during slab page allocation slub: Update statistics handling for variable order slabs slub: Add kmem_cache_order_objects struct slub: for_each_object must be passed the number of objects in a slab slub: Store max number of objects in the page struct. slub: Dump list of objects not freed on kmem_cache_close() slub: free_list() cleanup slub: improve kmem_cache_destroy() error message slob: fix bug - when slob allocates "struct kmem_cache", it does not force alignment.
-rw-r--r--Documentation/vm/slabinfo.c27
-rw-r--r--include/linux/mm_types.h5
-rw-r--r--include/linux/slub_def.h16
-rw-r--r--mm/slob.c3
-rw-r--r--mm/slub.c481
5 files changed, 318 insertions, 214 deletions
diff --git a/Documentation/vm/slabinfo.c b/Documentation/vm/slabinfo.c
index 22d7e3e4d60c..d3ce295bffac 100644
--- a/Documentation/vm/slabinfo.c
+++ b/Documentation/vm/slabinfo.c
@@ -31,7 +31,7 @@ struct slabinfo {
31 int hwcache_align, object_size, objs_per_slab; 31 int hwcache_align, object_size, objs_per_slab;
32 int sanity_checks, slab_size, store_user, trace; 32 int sanity_checks, slab_size, store_user, trace;
33 int order, poison, reclaim_account, red_zone; 33 int order, poison, reclaim_account, red_zone;
34 unsigned long partial, objects, slabs; 34 unsigned long partial, objects, slabs, objects_partial, objects_total;
35 unsigned long alloc_fastpath, alloc_slowpath; 35 unsigned long alloc_fastpath, alloc_slowpath;
36 unsigned long free_fastpath, free_slowpath; 36 unsigned long free_fastpath, free_slowpath;
37 unsigned long free_frozen, free_add_partial, free_remove_partial; 37 unsigned long free_frozen, free_add_partial, free_remove_partial;
@@ -540,7 +540,8 @@ void slabcache(struct slabinfo *s)
540 return; 540 return;
541 541
542 store_size(size_str, slab_size(s)); 542 store_size(size_str, slab_size(s));
543 snprintf(dist_str, 40, "%lu/%lu/%d", s->slabs, s->partial, s->cpu_slabs); 543 snprintf(dist_str, 40, "%lu/%lu/%d", s->slabs - s->cpu_slabs,
544 s->partial, s->cpu_slabs);
544 545
545 if (!line++) 546 if (!line++)
546 first_line(); 547 first_line();
@@ -776,7 +777,6 @@ void totals(void)
776 unsigned long used; 777 unsigned long used;
777 unsigned long long wasted; 778 unsigned long long wasted;
778 unsigned long long objwaste; 779 unsigned long long objwaste;
779 long long objects_in_partial_slabs;
780 unsigned long percentage_partial_slabs; 780 unsigned long percentage_partial_slabs;
781 unsigned long percentage_partial_objs; 781 unsigned long percentage_partial_objs;
782 782
@@ -790,18 +790,11 @@ void totals(void)
790 wasted = size - used; 790 wasted = size - used;
791 objwaste = s->slab_size - s->object_size; 791 objwaste = s->slab_size - s->object_size;
792 792
793 objects_in_partial_slabs = s->objects -
794 (s->slabs - s->partial - s ->cpu_slabs) *
795 s->objs_per_slab;
796
797 if (objects_in_partial_slabs < 0)
798 objects_in_partial_slabs = 0;
799
800 percentage_partial_slabs = s->partial * 100 / s->slabs; 793 percentage_partial_slabs = s->partial * 100 / s->slabs;
801 if (percentage_partial_slabs > 100) 794 if (percentage_partial_slabs > 100)
802 percentage_partial_slabs = 100; 795 percentage_partial_slabs = 100;
803 796
804 percentage_partial_objs = objects_in_partial_slabs * 100 797 percentage_partial_objs = s->objects_partial * 100
805 / s->objects; 798 / s->objects;
806 799
807 if (percentage_partial_objs > 100) 800 if (percentage_partial_objs > 100)
@@ -823,8 +816,8 @@ void totals(void)
823 min_objects = s->objects; 816 min_objects = s->objects;
824 if (used < min_used) 817 if (used < min_used)
825 min_used = used; 818 min_used = used;
826 if (objects_in_partial_slabs < min_partobj) 819 if (s->objects_partial < min_partobj)
827 min_partobj = objects_in_partial_slabs; 820 min_partobj = s->objects_partial;
828 if (percentage_partial_slabs < min_ppart) 821 if (percentage_partial_slabs < min_ppart)
829 min_ppart = percentage_partial_slabs; 822 min_ppart = percentage_partial_slabs;
830 if (percentage_partial_objs < min_ppartobj) 823 if (percentage_partial_objs < min_ppartobj)
@@ -848,8 +841,8 @@ void totals(void)
848 max_objects = s->objects; 841 max_objects = s->objects;
849 if (used > max_used) 842 if (used > max_used)
850 max_used = used; 843 max_used = used;
851 if (objects_in_partial_slabs > max_partobj) 844 if (s->objects_partial > max_partobj)
852 max_partobj = objects_in_partial_slabs; 845 max_partobj = s->objects_partial;
853 if (percentage_partial_slabs > max_ppart) 846 if (percentage_partial_slabs > max_ppart)
854 max_ppart = percentage_partial_slabs; 847 max_ppart = percentage_partial_slabs;
855 if (percentage_partial_objs > max_ppartobj) 848 if (percentage_partial_objs > max_ppartobj)
@@ -864,7 +857,7 @@ void totals(void)
864 857
865 total_objects += s->objects; 858 total_objects += s->objects;
866 total_used += used; 859 total_used += used;
867 total_partobj += objects_in_partial_slabs; 860 total_partobj += s->objects_partial;
868 total_ppart += percentage_partial_slabs; 861 total_ppart += percentage_partial_slabs;
869 total_ppartobj += percentage_partial_objs; 862 total_ppartobj += percentage_partial_objs;
870 863
@@ -1160,6 +1153,8 @@ void read_slab_dir(void)
1160 slab->hwcache_align = get_obj("hwcache_align"); 1153 slab->hwcache_align = get_obj("hwcache_align");
1161 slab->object_size = get_obj("object_size"); 1154 slab->object_size = get_obj("object_size");
1162 slab->objects = get_obj("objects"); 1155 slab->objects = get_obj("objects");
1156 slab->objects_partial = get_obj("objects_partial");
1157 slab->objects_total = get_obj("objects_total");
1163 slab->objs_per_slab = get_obj("objs_per_slab"); 1158 slab->objs_per_slab = get_obj("objs_per_slab");
1164 slab->order = get_obj("order"); 1159 slab->order = get_obj("order");
1165 slab->partial = get_obj("partial"); 1160 slab->partial = get_obj("partial");
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 29adaa781cb6..e2bae8dde35a 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -42,7 +42,10 @@ struct page {
42 * to show when page is mapped 42 * to show when page is mapped
43 * & limit reverse map searches. 43 * & limit reverse map searches.
44 */ 44 */
45 unsigned int inuse; /* SLUB: Nr of objects */ 45 struct { /* SLUB */
46 u16 inuse;
47 u16 objects;
48 };
46 }; 49 };
47 union { 50 union {
48 struct { 51 struct {
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 79d59c937fac..71e43a12ebbb 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -29,6 +29,7 @@ enum stat_item {
29 DEACTIVATE_TO_HEAD, /* Cpu slab was moved to the head of partials */ 29 DEACTIVATE_TO_HEAD, /* Cpu slab was moved to the head of partials */
30 DEACTIVATE_TO_TAIL, /* Cpu slab was moved to the tail of partials */ 30 DEACTIVATE_TO_TAIL, /* Cpu slab was moved to the tail of partials */
31 DEACTIVATE_REMOTE_FREES,/* Slab contained remotely freed objects */ 31 DEACTIVATE_REMOTE_FREES,/* Slab contained remotely freed objects */
32 ORDER_FALLBACK, /* Number of times fallback was necessary */
32 NR_SLUB_STAT_ITEMS }; 33 NR_SLUB_STAT_ITEMS };
33 34
34struct kmem_cache_cpu { 35struct kmem_cache_cpu {
@@ -48,11 +49,21 @@ struct kmem_cache_node {
48 struct list_head partial; 49 struct list_head partial;
49#ifdef CONFIG_SLUB_DEBUG 50#ifdef CONFIG_SLUB_DEBUG
50 atomic_long_t nr_slabs; 51 atomic_long_t nr_slabs;
52 atomic_long_t total_objects;
51 struct list_head full; 53 struct list_head full;
52#endif 54#endif
53}; 55};
54 56
55/* 57/*
58 * Word size structure that can be atomically updated or read and that
59 * contains both the order and the number of objects that a slab of the
60 * given order would contain.
61 */
62struct kmem_cache_order_objects {
63 unsigned long x;
64};
65
66/*
56 * Slab cache management. 67 * Slab cache management.
57 */ 68 */
58struct kmem_cache { 69struct kmem_cache {
@@ -61,7 +72,7 @@ struct kmem_cache {
61 int size; /* The size of an object including meta data */ 72 int size; /* The size of an object including meta data */
62 int objsize; /* The size of an object without meta data */ 73 int objsize; /* The size of an object without meta data */
63 int offset; /* Free pointer offset. */ 74 int offset; /* Free pointer offset. */
64 int order; /* Current preferred allocation order */ 75 struct kmem_cache_order_objects oo;
65 76
66 /* 77 /*
67 * Avoid an extra cache line for UP, SMP and for the node local to 78 * Avoid an extra cache line for UP, SMP and for the node local to
@@ -70,7 +81,8 @@ struct kmem_cache {
70 struct kmem_cache_node local_node; 81 struct kmem_cache_node local_node;
71 82
72 /* Allocation and freeing of slabs */ 83 /* Allocation and freeing of slabs */
73 int objects; /* Number of objects in slab */ 84 struct kmem_cache_order_objects max;
85 struct kmem_cache_order_objects min;
74 gfp_t allocflags; /* gfp flags to use on each alloc */ 86 gfp_t allocflags; /* gfp flags to use on each alloc */
75 int refcount; /* Refcount for slab cache destroy */ 87 int refcount; /* Refcount for slab cache destroy */
76 void (*ctor)(struct kmem_cache *, void *); 88 void (*ctor)(struct kmem_cache *, void *);
diff --git a/mm/slob.c b/mm/slob.c
index e2c3c0ec5463..6038cbadf796 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -533,7 +533,8 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
533{ 533{
534 struct kmem_cache *c; 534 struct kmem_cache *c;
535 535
536 c = slob_alloc(sizeof(struct kmem_cache), flags, 0, -1); 536 c = slob_alloc(sizeof(struct kmem_cache),
537 flags, ARCH_KMALLOC_MINALIGN, -1);
537 538
538 if (c) { 539 if (c) {
539 c->name = name; 540 c->name = name;
diff --git a/mm/slub.c b/mm/slub.c
index 38914bc64aca..992ecd4f0d39 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -149,25 +149,6 @@ static inline void ClearSlabDebug(struct page *page)
149/* Enable to test recovery from slab corruption on boot */ 149/* Enable to test recovery from slab corruption on boot */
150#undef SLUB_RESILIENCY_TEST 150#undef SLUB_RESILIENCY_TEST
151 151
152#if PAGE_SHIFT <= 12
153
154/*
155 * Small page size. Make sure that we do not fragment memory
156 */
157#define DEFAULT_MAX_ORDER 1
158#define DEFAULT_MIN_OBJECTS 4
159
160#else
161
162/*
163 * Large page machines are customarily able to handle larger
164 * page orders.
165 */
166#define DEFAULT_MAX_ORDER 2
167#define DEFAULT_MIN_OBJECTS 8
168
169#endif
170
171/* 152/*
172 * Mininum number of partial slabs. These will be left on the partial 153 * Mininum number of partial slabs. These will be left on the partial
173 * lists even if they are empty. kmem_cache_shrink may reclaim them. 154 * lists even if they are empty. kmem_cache_shrink may reclaim them.
@@ -204,8 +185,6 @@ static inline void ClearSlabDebug(struct page *page)
204/* Internal SLUB flags */ 185/* Internal SLUB flags */
205#define __OBJECT_POISON 0x80000000 /* Poison object */ 186#define __OBJECT_POISON 0x80000000 /* Poison object */
206#define __SYSFS_ADD_DEFERRED 0x40000000 /* Not yet visible via sysfs */ 187#define __SYSFS_ADD_DEFERRED 0x40000000 /* Not yet visible via sysfs */
207#define __KMALLOC_CACHE 0x20000000 /* objects freed using kfree */
208#define __PAGE_ALLOC_FALLBACK 0x10000000 /* Allow fallback to page alloc */
209 188
210static int kmem_size = sizeof(struct kmem_cache); 189static int kmem_size = sizeof(struct kmem_cache);
211 190
@@ -296,7 +275,7 @@ static inline int check_valid_pointer(struct kmem_cache *s,
296 return 1; 275 return 1;
297 276
298 base = page_address(page); 277 base = page_address(page);
299 if (object < base || object >= base + s->objects * s->size || 278 if (object < base || object >= base + page->objects * s->size ||
300 (object - base) % s->size) { 279 (object - base) % s->size) {
301 return 0; 280 return 0;
302 } 281 }
@@ -322,8 +301,8 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
322} 301}
323 302
324/* Loop over all objects in a slab */ 303/* Loop over all objects in a slab */
325#define for_each_object(__p, __s, __addr) \ 304#define for_each_object(__p, __s, __addr, __objects) \
326 for (__p = (__addr); __p < (__addr) + (__s)->objects * (__s)->size;\ 305 for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\
327 __p += (__s)->size) 306 __p += (__s)->size)
328 307
329/* Scan freelist */ 308/* Scan freelist */
@@ -336,6 +315,26 @@ static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
336 return (p - addr) / s->size; 315 return (p - addr) / s->size;
337} 316}
338 317
318static inline struct kmem_cache_order_objects oo_make(int order,
319 unsigned long size)
320{
321 struct kmem_cache_order_objects x = {
322 (order << 16) + (PAGE_SIZE << order) / size
323 };
324
325 return x;
326}
327
328static inline int oo_order(struct kmem_cache_order_objects x)
329{
330 return x.x >> 16;
331}
332
333static inline int oo_objects(struct kmem_cache_order_objects x)
334{
335 return x.x & ((1 << 16) - 1);
336}
337
339#ifdef CONFIG_SLUB_DEBUG 338#ifdef CONFIG_SLUB_DEBUG
340/* 339/*
341 * Debug settings: 340 * Debug settings:
@@ -446,8 +445,8 @@ static void print_tracking(struct kmem_cache *s, void *object)
446 445
447static void print_page_info(struct page *page) 446static void print_page_info(struct page *page)
448{ 447{
449 printk(KERN_ERR "INFO: Slab 0x%p used=%u fp=0x%p flags=0x%04lx\n", 448 printk(KERN_ERR "INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n",
450 page, page->inuse, page->freelist, page->flags); 449 page, page->objects, page->inuse, page->freelist, page->flags);
451 450
452} 451}
453 452
@@ -647,6 +646,7 @@ static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
647 p + off, POISON_INUSE, s->size - off); 646 p + off, POISON_INUSE, s->size - off);
648} 647}
649 648
649/* Check the pad bytes at the end of a slab page */
650static int slab_pad_check(struct kmem_cache *s, struct page *page) 650static int slab_pad_check(struct kmem_cache *s, struct page *page)
651{ 651{
652 u8 *start; 652 u8 *start;
@@ -659,20 +659,20 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)
659 return 1; 659 return 1;
660 660
661 start = page_address(page); 661 start = page_address(page);
662 end = start + (PAGE_SIZE << s->order); 662 length = (PAGE_SIZE << compound_order(page));
663 length = s->objects * s->size; 663 end = start + length;
664 remainder = end - (start + length); 664 remainder = length % s->size;
665 if (!remainder) 665 if (!remainder)
666 return 1; 666 return 1;
667 667
668 fault = check_bytes(start + length, POISON_INUSE, remainder); 668 fault = check_bytes(end - remainder, POISON_INUSE, remainder);
669 if (!fault) 669 if (!fault)
670 return 1; 670 return 1;
671 while (end > fault && end[-1] == POISON_INUSE) 671 while (end > fault && end[-1] == POISON_INUSE)
672 end--; 672 end--;
673 673
674 slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1); 674 slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
675 print_section("Padding", start, length); 675 print_section("Padding", end - remainder, remainder);
676 676
677 restore_bytes(s, "slab padding", POISON_INUSE, start, end); 677 restore_bytes(s, "slab padding", POISON_INUSE, start, end);
678 return 0; 678 return 0;
@@ -734,15 +734,24 @@ static int check_object(struct kmem_cache *s, struct page *page,
734 734
735static int check_slab(struct kmem_cache *s, struct page *page) 735static int check_slab(struct kmem_cache *s, struct page *page)
736{ 736{
737 int maxobj;
738
737 VM_BUG_ON(!irqs_disabled()); 739 VM_BUG_ON(!irqs_disabled());
738 740
739 if (!PageSlab(page)) { 741 if (!PageSlab(page)) {
740 slab_err(s, page, "Not a valid slab page"); 742 slab_err(s, page, "Not a valid slab page");
741 return 0; 743 return 0;
742 } 744 }
743 if (page->inuse > s->objects) { 745
746 maxobj = (PAGE_SIZE << compound_order(page)) / s->size;
747 if (page->objects > maxobj) {
748 slab_err(s, page, "objects %u > max %u",
749 s->name, page->objects, maxobj);
750 return 0;
751 }
752 if (page->inuse > page->objects) {
744 slab_err(s, page, "inuse %u > max %u", 753 slab_err(s, page, "inuse %u > max %u",
745 s->name, page->inuse, s->objects); 754 s->name, page->inuse, page->objects);
746 return 0; 755 return 0;
747 } 756 }
748 /* Slab_pad_check fixes things up after itself */ 757 /* Slab_pad_check fixes things up after itself */
@@ -759,8 +768,9 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
759 int nr = 0; 768 int nr = 0;
760 void *fp = page->freelist; 769 void *fp = page->freelist;
761 void *object = NULL; 770 void *object = NULL;
771 unsigned long max_objects;
762 772
763 while (fp && nr <= s->objects) { 773 while (fp && nr <= page->objects) {
764 if (fp == search) 774 if (fp == search)
765 return 1; 775 return 1;
766 if (!check_valid_pointer(s, page, fp)) { 776 if (!check_valid_pointer(s, page, fp)) {
@@ -772,7 +782,7 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
772 } else { 782 } else {
773 slab_err(s, page, "Freepointer corrupt"); 783 slab_err(s, page, "Freepointer corrupt");
774 page->freelist = NULL; 784 page->freelist = NULL;
775 page->inuse = s->objects; 785 page->inuse = page->objects;
776 slab_fix(s, "Freelist cleared"); 786 slab_fix(s, "Freelist cleared");
777 return 0; 787 return 0;
778 } 788 }
@@ -783,10 +793,20 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
783 nr++; 793 nr++;
784 } 794 }
785 795
786 if (page->inuse != s->objects - nr) { 796 max_objects = (PAGE_SIZE << compound_order(page)) / s->size;
797 if (max_objects > 65535)
798 max_objects = 65535;
799
800 if (page->objects != max_objects) {
801 slab_err(s, page, "Wrong number of objects. Found %d but "
802 "should be %d", page->objects, max_objects);
803 page->objects = max_objects;
804 slab_fix(s, "Number of objects adjusted.");
805 }
806 if (page->inuse != page->objects - nr) {
787 slab_err(s, page, "Wrong object count. Counter is %d but " 807 slab_err(s, page, "Wrong object count. Counter is %d but "
788 "counted were %d", page->inuse, s->objects - nr); 808 "counted were %d", page->inuse, page->objects - nr);
789 page->inuse = s->objects - nr; 809 page->inuse = page->objects - nr;
790 slab_fix(s, "Object count adjusted."); 810 slab_fix(s, "Object count adjusted.");
791 } 811 }
792 return search == NULL; 812 return search == NULL;
@@ -840,7 +860,7 @@ static inline unsigned long slabs_node(struct kmem_cache *s, int node)
840 return atomic_long_read(&n->nr_slabs); 860 return atomic_long_read(&n->nr_slabs);
841} 861}
842 862
843static inline void inc_slabs_node(struct kmem_cache *s, int node) 863static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
844{ 864{
845 struct kmem_cache_node *n = get_node(s, node); 865 struct kmem_cache_node *n = get_node(s, node);
846 866
@@ -850,14 +870,17 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node)
850 * dilemma by deferring the increment of the count during 870 * dilemma by deferring the increment of the count during
851 * bootstrap (see early_kmem_cache_node_alloc). 871 * bootstrap (see early_kmem_cache_node_alloc).
852 */ 872 */
853 if (!NUMA_BUILD || n) 873 if (!NUMA_BUILD || n) {
854 atomic_long_inc(&n->nr_slabs); 874 atomic_long_inc(&n->nr_slabs);
875 atomic_long_add(objects, &n->total_objects);
876 }
855} 877}
856static inline void dec_slabs_node(struct kmem_cache *s, int node) 878static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
857{ 879{
858 struct kmem_cache_node *n = get_node(s, node); 880 struct kmem_cache_node *n = get_node(s, node);
859 881
860 atomic_long_dec(&n->nr_slabs); 882 atomic_long_dec(&n->nr_slabs);
883 atomic_long_sub(objects, &n->total_objects);
861} 884}
862 885
863/* Object debug checks for alloc/free paths */ 886/* Object debug checks for alloc/free paths */
@@ -905,7 +928,7 @@ bad:
905 * as used avoids touching the remaining objects. 928 * as used avoids touching the remaining objects.
906 */ 929 */
907 slab_fix(s, "Marking all objects used"); 930 slab_fix(s, "Marking all objects used");
908 page->inuse = s->objects; 931 page->inuse = page->objects;
909 page->freelist = NULL; 932 page->freelist = NULL;
910 } 933 }
911 return 0; 934 return 0;
@@ -1055,31 +1078,52 @@ static inline unsigned long kmem_cache_flags(unsigned long objsize,
1055 1078
1056static inline unsigned long slabs_node(struct kmem_cache *s, int node) 1079static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1057 { return 0; } 1080 { return 0; }
1058static inline void inc_slabs_node(struct kmem_cache *s, int node) {} 1081static inline void inc_slabs_node(struct kmem_cache *s, int node,
1059static inline void dec_slabs_node(struct kmem_cache *s, int node) {} 1082 int objects) {}
1083static inline void dec_slabs_node(struct kmem_cache *s, int node,
1084 int objects) {}
1060#endif 1085#endif
1086
1061/* 1087/*
1062 * Slab allocation and freeing 1088 * Slab allocation and freeing
1063 */ 1089 */
1090static inline struct page *alloc_slab_page(gfp_t flags, int node,
1091 struct kmem_cache_order_objects oo)
1092{
1093 int order = oo_order(oo);
1094
1095 if (node == -1)
1096 return alloc_pages(flags, order);
1097 else
1098 return alloc_pages_node(node, flags, order);
1099}
1100
1064static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) 1101static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1065{ 1102{
1066 struct page *page; 1103 struct page *page;
1067 int pages = 1 << s->order; 1104 struct kmem_cache_order_objects oo = s->oo;
1068 1105
1069 flags |= s->allocflags; 1106 flags |= s->allocflags;
1070 1107
1071 if (node == -1) 1108 page = alloc_slab_page(flags | __GFP_NOWARN | __GFP_NORETRY, node,
1072 page = alloc_pages(flags, s->order); 1109 oo);
1073 else 1110 if (unlikely(!page)) {
1074 page = alloc_pages_node(node, flags, s->order); 1111 oo = s->min;
1075 1112 /*
1076 if (!page) 1113 * Allocation may have failed due to fragmentation.
1077 return NULL; 1114 * Try a lower order alloc if possible
1115 */
1116 page = alloc_slab_page(flags, node, oo);
1117 if (!page)
1118 return NULL;
1078 1119
1120 stat(get_cpu_slab(s, raw_smp_processor_id()), ORDER_FALLBACK);
1121 }
1122 page->objects = oo_objects(oo);
1079 mod_zone_page_state(page_zone(page), 1123 mod_zone_page_state(page_zone(page),
1080 (s->flags & SLAB_RECLAIM_ACCOUNT) ? 1124 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1081 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, 1125 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1082 pages); 1126 1 << oo_order(oo));
1083 1127
1084 return page; 1128 return page;
1085} 1129}
@@ -1106,7 +1150,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1106 if (!page) 1150 if (!page)
1107 goto out; 1151 goto out;
1108 1152
1109 inc_slabs_node(s, page_to_nid(page)); 1153 inc_slabs_node(s, page_to_nid(page), page->objects);
1110 page->slab = s; 1154 page->slab = s;
1111 page->flags |= 1 << PG_slab; 1155 page->flags |= 1 << PG_slab;
1112 if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON | 1156 if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON |
@@ -1116,10 +1160,10 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1116 start = page_address(page); 1160 start = page_address(page);
1117 1161
1118 if (unlikely(s->flags & SLAB_POISON)) 1162 if (unlikely(s->flags & SLAB_POISON))
1119 memset(start, POISON_INUSE, PAGE_SIZE << s->order); 1163 memset(start, POISON_INUSE, PAGE_SIZE << compound_order(page));
1120 1164
1121 last = start; 1165 last = start;
1122 for_each_object(p, s, start) { 1166 for_each_object(p, s, start, page->objects) {
1123 setup_object(s, page, last); 1167 setup_object(s, page, last);
1124 set_freepointer(s, last, p); 1168 set_freepointer(s, last, p);
1125 last = p; 1169 last = p;
@@ -1135,13 +1179,15 @@ out:
1135 1179
1136static void __free_slab(struct kmem_cache *s, struct page *page) 1180static void __free_slab(struct kmem_cache *s, struct page *page)
1137{ 1181{
1138 int pages = 1 << s->order; 1182 int order = compound_order(page);
1183 int pages = 1 << order;
1139 1184
1140 if (unlikely(SlabDebug(page))) { 1185 if (unlikely(SlabDebug(page))) {
1141 void *p; 1186 void *p;
1142 1187
1143 slab_pad_check(s, page); 1188 slab_pad_check(s, page);
1144 for_each_object(p, s, page_address(page)) 1189 for_each_object(p, s, page_address(page),
1190 page->objects)
1145 check_object(s, page, p, 0); 1191 check_object(s, page, p, 0);
1146 ClearSlabDebug(page); 1192 ClearSlabDebug(page);
1147 } 1193 }
@@ -1153,7 +1199,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
1153 1199
1154 __ClearPageSlab(page); 1200 __ClearPageSlab(page);
1155 reset_page_mapcount(page); 1201 reset_page_mapcount(page);
1156 __free_pages(page, s->order); 1202 __free_pages(page, order);
1157} 1203}
1158 1204
1159static void rcu_free_slab(struct rcu_head *h) 1205static void rcu_free_slab(struct rcu_head *h)
@@ -1179,7 +1225,7 @@ static void free_slab(struct kmem_cache *s, struct page *page)
1179 1225
1180static void discard_slab(struct kmem_cache *s, struct page *page) 1226static void discard_slab(struct kmem_cache *s, struct page *page)
1181{ 1227{
1182 dec_slabs_node(s, page_to_nid(page)); 1228 dec_slabs_node(s, page_to_nid(page), page->objects);
1183 free_slab(s, page); 1229 free_slab(s, page);
1184} 1230}
1185 1231
@@ -1515,7 +1561,7 @@ load_freelist:
1515 goto debug; 1561 goto debug;
1516 1562
1517 c->freelist = object[c->offset]; 1563 c->freelist = object[c->offset];
1518 c->page->inuse = s->objects; 1564 c->page->inuse = c->page->objects;
1519 c->page->freelist = NULL; 1565 c->page->freelist = NULL;
1520 c->node = page_to_nid(c->page); 1566 c->node = page_to_nid(c->page);
1521unlock_out: 1567unlock_out:
@@ -1552,27 +1598,6 @@ new_slab:
1552 c->page = new; 1598 c->page = new;
1553 goto load_freelist; 1599 goto load_freelist;
1554 } 1600 }
1555
1556 /*
1557 * No memory available.
1558 *
1559 * If the slab uses higher order allocs but the object is
1560 * smaller than a page size then we can fallback in emergencies
1561 * to the page allocator via kmalloc_large. The page allocator may
1562 * have failed to obtain a higher order page and we can try to
1563 * allocate a single page if the object fits into a single page.
1564 * That is only possible if certain conditions are met that are being
1565 * checked when a slab is created.
1566 */
1567 if (!(gfpflags & __GFP_NORETRY) &&
1568 (s->flags & __PAGE_ALLOC_FALLBACK)) {
1569 if (gfpflags & __GFP_WAIT)
1570 local_irq_enable();
1571 object = kmalloc_large(s->objsize, gfpflags);
1572 if (gfpflags & __GFP_WAIT)
1573 local_irq_disable();
1574 return object;
1575 }
1576 return NULL; 1601 return NULL;
1577debug: 1602debug:
1578 if (!alloc_debug_processing(s, c->page, object, addr)) 1603 if (!alloc_debug_processing(s, c->page, object, addr))
@@ -1773,8 +1798,8 @@ static struct page *get_object_page(const void *x)
1773 * take the list_lock. 1798 * take the list_lock.
1774 */ 1799 */
1775static int slub_min_order; 1800static int slub_min_order;
1776static int slub_max_order = DEFAULT_MAX_ORDER; 1801static int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
1777static int slub_min_objects = DEFAULT_MIN_OBJECTS; 1802static int slub_min_objects;
1778 1803
1779/* 1804/*
1780 * Merge control. If this is set then no merging of slab caches will occur. 1805 * Merge control. If this is set then no merging of slab caches will occur.
@@ -1789,7 +1814,7 @@ static int slub_nomerge;
1789 * system components. Generally order 0 allocations should be preferred since 1814 * system components. Generally order 0 allocations should be preferred since
1790 * order 0 does not cause fragmentation in the page allocator. Larger objects 1815 * order 0 does not cause fragmentation in the page allocator. Larger objects
1791 * be problematic to put into order 0 slabs because there may be too much 1816 * be problematic to put into order 0 slabs because there may be too much
1792 * unused space left. We go to a higher order if more than 1/8th of the slab 1817 * unused space left. We go to a higher order if more than 1/16th of the slab
1793 * would be wasted. 1818 * would be wasted.
1794 * 1819 *
1795 * In order to reach satisfactory performance we must ensure that a minimum 1820 * In order to reach satisfactory performance we must ensure that a minimum
@@ -1814,6 +1839,9 @@ static inline int slab_order(int size, int min_objects,
1814 int rem; 1839 int rem;
1815 int min_order = slub_min_order; 1840 int min_order = slub_min_order;
1816 1841
1842 if ((PAGE_SIZE << min_order) / size > 65535)
1843 return get_order(size * 65535) - 1;
1844
1817 for (order = max(min_order, 1845 for (order = max(min_order,
1818 fls(min_objects * size - 1) - PAGE_SHIFT); 1846 fls(min_objects * size - 1) - PAGE_SHIFT);
1819 order <= max_order; order++) { 1847 order <= max_order; order++) {
@@ -1848,8 +1876,10 @@ static inline int calculate_order(int size)
1848 * we reduce the minimum objects required in a slab. 1876 * we reduce the minimum objects required in a slab.
1849 */ 1877 */
1850 min_objects = slub_min_objects; 1878 min_objects = slub_min_objects;
1879 if (!min_objects)
1880 min_objects = 4 * (fls(nr_cpu_ids) + 1);
1851 while (min_objects > 1) { 1881 while (min_objects > 1) {
1852 fraction = 8; 1882 fraction = 16;
1853 while (fraction >= 4) { 1883 while (fraction >= 4) {
1854 order = slab_order(size, min_objects, 1884 order = slab_order(size, min_objects,
1855 slub_max_order, fraction); 1885 slub_max_order, fraction);
@@ -2091,7 +2121,7 @@ static struct kmem_cache_node *early_kmem_cache_node_alloc(gfp_t gfpflags,
2091 init_tracking(kmalloc_caches, n); 2121 init_tracking(kmalloc_caches, n);
2092#endif 2122#endif
2093 init_kmem_cache_node(n); 2123 init_kmem_cache_node(n);
2094 inc_slabs_node(kmalloc_caches, node); 2124 inc_slabs_node(kmalloc_caches, node, page->objects);
2095 2125
2096 /* 2126 /*
2097 * lockdep requires consistent irq usage for each lock 2127 * lockdep requires consistent irq usage for each lock
@@ -2167,11 +2197,12 @@ static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags)
2167 * calculate_sizes() determines the order and the distribution of data within 2197 * calculate_sizes() determines the order and the distribution of data within
2168 * a slab object. 2198 * a slab object.
2169 */ 2199 */
2170static int calculate_sizes(struct kmem_cache *s) 2200static int calculate_sizes(struct kmem_cache *s, int forced_order)
2171{ 2201{
2172 unsigned long flags = s->flags; 2202 unsigned long flags = s->flags;
2173 unsigned long size = s->objsize; 2203 unsigned long size = s->objsize;
2174 unsigned long align = s->align; 2204 unsigned long align = s->align;
2205 int order;
2175 2206
2176 /* 2207 /*
2177 * Round up object size to the next word boundary. We can only 2208 * Round up object size to the next word boundary. We can only
@@ -2255,26 +2286,16 @@ static int calculate_sizes(struct kmem_cache *s)
2255 */ 2286 */
2256 size = ALIGN(size, align); 2287 size = ALIGN(size, align);
2257 s->size = size; 2288 s->size = size;
2289 if (forced_order >= 0)
2290 order = forced_order;
2291 else
2292 order = calculate_order(size);
2258 2293
2259 if ((flags & __KMALLOC_CACHE) && 2294 if (order < 0)
2260 PAGE_SIZE / size < slub_min_objects) {
2261 /*
2262 * Kmalloc cache that would not have enough objects in
2263 * an order 0 page. Kmalloc slabs can fallback to
2264 * page allocator order 0 allocs so take a reasonably large
2265 * order that will allows us a good number of objects.
2266 */
2267 s->order = max(slub_max_order, PAGE_ALLOC_COSTLY_ORDER);
2268 s->flags |= __PAGE_ALLOC_FALLBACK;
2269 s->allocflags |= __GFP_NOWARN;
2270 } else
2271 s->order = calculate_order(size);
2272
2273 if (s->order < 0)
2274 return 0; 2295 return 0;
2275 2296
2276 s->allocflags = 0; 2297 s->allocflags = 0;
2277 if (s->order) 2298 if (order)
2278 s->allocflags |= __GFP_COMP; 2299 s->allocflags |= __GFP_COMP;
2279 2300
2280 if (s->flags & SLAB_CACHE_DMA) 2301 if (s->flags & SLAB_CACHE_DMA)
@@ -2286,9 +2307,12 @@ static int calculate_sizes(struct kmem_cache *s)
2286 /* 2307 /*
2287 * Determine the number of objects per slab 2308 * Determine the number of objects per slab
2288 */ 2309 */
2289 s->objects = (PAGE_SIZE << s->order) / size; 2310 s->oo = oo_make(order, size);
2311 s->min = oo_make(get_order(size), size);
2312 if (oo_objects(s->oo) > oo_objects(s->max))
2313 s->max = s->oo;
2290 2314
2291 return !!s->objects; 2315 return !!oo_objects(s->oo);
2292 2316
2293} 2317}
2294 2318
@@ -2304,7 +2328,7 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags,
2304 s->align = align; 2328 s->align = align;
2305 s->flags = kmem_cache_flags(size, flags, name, ctor); 2329 s->flags = kmem_cache_flags(size, flags, name, ctor);
2306 2330
2307 if (!calculate_sizes(s)) 2331 if (!calculate_sizes(s, -1))
2308 goto error; 2332 goto error;
2309 2333
2310 s->refcount = 1; 2334 s->refcount = 1;
@@ -2321,7 +2345,7 @@ error:
2321 if (flags & SLAB_PANIC) 2345 if (flags & SLAB_PANIC)
2322 panic("Cannot create slab %s size=%lu realsize=%u " 2346 panic("Cannot create slab %s size=%lu realsize=%u "
2323 "order=%u offset=%u flags=%lx\n", 2347 "order=%u offset=%u flags=%lx\n",
2324 s->name, (unsigned long)size, s->size, s->order, 2348 s->name, (unsigned long)size, s->size, oo_order(s->oo),
2325 s->offset, flags); 2349 s->offset, flags);
2326 return 0; 2350 return 0;
2327} 2351}
@@ -2367,26 +2391,52 @@ const char *kmem_cache_name(struct kmem_cache *s)
2367} 2391}
2368EXPORT_SYMBOL(kmem_cache_name); 2392EXPORT_SYMBOL(kmem_cache_name);
2369 2393
2394static void list_slab_objects(struct kmem_cache *s, struct page *page,
2395 const char *text)
2396{
2397#ifdef CONFIG_SLUB_DEBUG
2398 void *addr = page_address(page);
2399 void *p;
2400 DECLARE_BITMAP(map, page->objects);
2401
2402 bitmap_zero(map, page->objects);
2403 slab_err(s, page, "%s", text);
2404 slab_lock(page);
2405 for_each_free_object(p, s, page->freelist)
2406 set_bit(slab_index(p, s, addr), map);
2407
2408 for_each_object(p, s, addr, page->objects) {
2409
2410 if (!test_bit(slab_index(p, s, addr), map)) {
2411 printk(KERN_ERR "INFO: Object 0x%p @offset=%tu\n",
2412 p, p - addr);
2413 print_tracking(s, p);
2414 }
2415 }
2416 slab_unlock(page);
2417#endif
2418}
2419
2370/* 2420/*
2371 * Attempt to free all slabs on a node. Return the number of slabs we 2421 * Attempt to free all partial slabs on a node.
2372 * were unable to free.
2373 */ 2422 */
2374static int free_list(struct kmem_cache *s, struct kmem_cache_node *n, 2423static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
2375 struct list_head *list)
2376{ 2424{
2377 int slabs_inuse = 0;
2378 unsigned long flags; 2425 unsigned long flags;
2379 struct page *page, *h; 2426 struct page *page, *h;
2380 2427
2381 spin_lock_irqsave(&n->list_lock, flags); 2428 spin_lock_irqsave(&n->list_lock, flags);
2382 list_for_each_entry_safe(page, h, list, lru) 2429 list_for_each_entry_safe(page, h, &n->partial, lru) {
2383 if (!page->inuse) { 2430 if (!page->inuse) {
2384 list_del(&page->lru); 2431 list_del(&page->lru);
2385 discard_slab(s, page); 2432 discard_slab(s, page);
2386 } else 2433 n->nr_partial--;
2387 slabs_inuse++; 2434 } else {
2435 list_slab_objects(s, page,
2436 "Objects remaining on kmem_cache_close()");
2437 }
2438 }
2388 spin_unlock_irqrestore(&n->list_lock, flags); 2439 spin_unlock_irqrestore(&n->list_lock, flags);
2389 return slabs_inuse;
2390} 2440}
2391 2441
2392/* 2442/*
@@ -2403,8 +2453,8 @@ static inline int kmem_cache_close(struct kmem_cache *s)
2403 for_each_node_state(node, N_NORMAL_MEMORY) { 2453 for_each_node_state(node, N_NORMAL_MEMORY) {
2404 struct kmem_cache_node *n = get_node(s, node); 2454 struct kmem_cache_node *n = get_node(s, node);
2405 2455
2406 n->nr_partial -= free_list(s, n, &n->partial); 2456 free_partial(s, n);
2407 if (slabs_node(s, node)) 2457 if (n->nr_partial || slabs_node(s, node))
2408 return 1; 2458 return 1;
2409 } 2459 }
2410 free_kmem_cache_nodes(s); 2460 free_kmem_cache_nodes(s);
@@ -2422,8 +2472,11 @@ void kmem_cache_destroy(struct kmem_cache *s)
2422 if (!s->refcount) { 2472 if (!s->refcount) {
2423 list_del(&s->list); 2473 list_del(&s->list);
2424 up_write(&slub_lock); 2474 up_write(&slub_lock);
2425 if (kmem_cache_close(s)) 2475 if (kmem_cache_close(s)) {
2426 WARN_ON(1); 2476 printk(KERN_ERR "SLUB %s: %s called for cache that "
2477 "still has objects.\n", s->name, __func__);
2478 dump_stack();
2479 }
2427 sysfs_slab_remove(s); 2480 sysfs_slab_remove(s);
2428 } else 2481 } else
2429 up_write(&slub_lock); 2482 up_write(&slub_lock);
@@ -2482,7 +2535,7 @@ static struct kmem_cache *create_kmalloc_cache(struct kmem_cache *s,
2482 2535
2483 down_write(&slub_lock); 2536 down_write(&slub_lock);
2484 if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN, 2537 if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN,
2485 flags | __KMALLOC_CACHE, NULL)) 2538 flags, NULL))
2486 goto panic; 2539 goto panic;
2487 2540
2488 list_add(&s->list, &slab_caches); 2541 list_add(&s->list, &slab_caches);
@@ -2730,8 +2783,9 @@ int kmem_cache_shrink(struct kmem_cache *s)
2730 struct kmem_cache_node *n; 2783 struct kmem_cache_node *n;
2731 struct page *page; 2784 struct page *page;
2732 struct page *t; 2785 struct page *t;
2786 int objects = oo_objects(s->max);
2733 struct list_head *slabs_by_inuse = 2787 struct list_head *slabs_by_inuse =
2734 kmalloc(sizeof(struct list_head) * s->objects, GFP_KERNEL); 2788 kmalloc(sizeof(struct list_head) * objects, GFP_KERNEL);
2735 unsigned long flags; 2789 unsigned long flags;
2736 2790
2737 if (!slabs_by_inuse) 2791 if (!slabs_by_inuse)
@@ -2744,7 +2798,7 @@ int kmem_cache_shrink(struct kmem_cache *s)
2744 if (!n->nr_partial) 2798 if (!n->nr_partial)
2745 continue; 2799 continue;
2746 2800
2747 for (i = 0; i < s->objects; i++) 2801 for (i = 0; i < objects; i++)
2748 INIT_LIST_HEAD(slabs_by_inuse + i); 2802 INIT_LIST_HEAD(slabs_by_inuse + i);
2749 2803
2750 spin_lock_irqsave(&n->list_lock, flags); 2804 spin_lock_irqsave(&n->list_lock, flags);
@@ -2776,7 +2830,7 @@ int kmem_cache_shrink(struct kmem_cache *s)
2776 * Rebuild the partial list with the slabs filled up most 2830 * Rebuild the partial list with the slabs filled up most
2777 * first and the least used slabs at the end. 2831 * first and the least used slabs at the end.
2778 */ 2832 */
2779 for (i = s->objects - 1; i >= 0; i--) 2833 for (i = objects - 1; i >= 0; i--)
2780 list_splice(slabs_by_inuse + i, n->partial.prev); 2834 list_splice(slabs_by_inuse + i, n->partial.prev);
2781 2835
2782 spin_unlock_irqrestore(&n->list_lock, flags); 2836 spin_unlock_irqrestore(&n->list_lock, flags);
@@ -2997,9 +3051,6 @@ static int slab_unmergeable(struct kmem_cache *s)
2997 if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE)) 3051 if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE))
2998 return 1; 3052 return 1;
2999 3053
3000 if ((s->flags & __PAGE_ALLOC_FALLBACK))
3001 return 1;
3002
3003 if (s->ctor) 3054 if (s->ctor)
3004 return 1; 3055 return 1;
3005 3056
@@ -3192,7 +3243,8 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
3192} 3243}
3193 3244
3194#if (defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG)) || defined(CONFIG_SLABINFO) 3245#if (defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG)) || defined(CONFIG_SLABINFO)
3195static unsigned long count_partial(struct kmem_cache_node *n) 3246static unsigned long count_partial(struct kmem_cache_node *n,
3247 int (*get_count)(struct page *))
3196{ 3248{
3197 unsigned long flags; 3249 unsigned long flags;
3198 unsigned long x = 0; 3250 unsigned long x = 0;
@@ -3200,10 +3252,25 @@ static unsigned long count_partial(struct kmem_cache_node *n)
3200 3252
3201 spin_lock_irqsave(&n->list_lock, flags); 3253 spin_lock_irqsave(&n->list_lock, flags);
3202 list_for_each_entry(page, &n->partial, lru) 3254 list_for_each_entry(page, &n->partial, lru)
3203 x += page->inuse; 3255 x += get_count(page);
3204 spin_unlock_irqrestore(&n->list_lock, flags); 3256 spin_unlock_irqrestore(&n->list_lock, flags);
3205 return x; 3257 return x;
3206} 3258}
3259
3260static int count_inuse(struct page *page)
3261{
3262 return page->inuse;
3263}
3264
3265static int count_total(struct page *page)
3266{
3267 return page->objects;
3268}
3269
3270static int count_free(struct page *page)
3271{
3272 return page->objects - page->inuse;
3273}
3207#endif 3274#endif
3208 3275
3209#if defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG) 3276#if defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG)
@@ -3218,7 +3285,7 @@ static int validate_slab(struct kmem_cache *s, struct page *page,
3218 return 0; 3285 return 0;
3219 3286
3220 /* Now we know that a valid freelist exists */ 3287 /* Now we know that a valid freelist exists */
3221 bitmap_zero(map, s->objects); 3288 bitmap_zero(map, page->objects);
3222 3289
3223 for_each_free_object(p, s, page->freelist) { 3290 for_each_free_object(p, s, page->freelist) {
3224 set_bit(slab_index(p, s, addr), map); 3291 set_bit(slab_index(p, s, addr), map);
@@ -3226,7 +3293,7 @@ static int validate_slab(struct kmem_cache *s, struct page *page,
3226 return 0; 3293 return 0;
3227 } 3294 }
3228 3295
3229 for_each_object(p, s, addr) 3296 for_each_object(p, s, addr, page->objects)
3230 if (!test_bit(slab_index(p, s, addr), map)) 3297 if (!test_bit(slab_index(p, s, addr), map))
3231 if (!check_object(s, page, p, 1)) 3298 if (!check_object(s, page, p, 1))
3232 return 0; 3299 return 0;
@@ -3292,7 +3359,7 @@ static long validate_slab_cache(struct kmem_cache *s)
3292{ 3359{
3293 int node; 3360 int node;
3294 unsigned long count = 0; 3361 unsigned long count = 0;
3295 unsigned long *map = kmalloc(BITS_TO_LONGS(s->objects) * 3362 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
3296 sizeof(unsigned long), GFP_KERNEL); 3363 sizeof(unsigned long), GFP_KERNEL);
3297 3364
3298 if (!map) 3365 if (!map)
@@ -3495,14 +3562,14 @@ static void process_slab(struct loc_track *t, struct kmem_cache *s,
3495 struct page *page, enum track_item alloc) 3562 struct page *page, enum track_item alloc)
3496{ 3563{
3497 void *addr = page_address(page); 3564 void *addr = page_address(page);
3498 DECLARE_BITMAP(map, s->objects); 3565 DECLARE_BITMAP(map, page->objects);
3499 void *p; 3566 void *p;
3500 3567
3501 bitmap_zero(map, s->objects); 3568 bitmap_zero(map, page->objects);
3502 for_each_free_object(p, s, page->freelist) 3569 for_each_free_object(p, s, page->freelist)
3503 set_bit(slab_index(p, s, addr), map); 3570 set_bit(slab_index(p, s, addr), map);
3504 3571
3505 for_each_object(p, s, addr) 3572 for_each_object(p, s, addr, page->objects)
3506 if (!test_bit(slab_index(p, s, addr), map)) 3573 if (!test_bit(slab_index(p, s, addr), map))
3507 add_location(t, s, get_track(s, p, alloc)); 3574 add_location(t, s, get_track(s, p, alloc));
3508} 3575}
@@ -3592,22 +3659,23 @@ static int list_locations(struct kmem_cache *s, char *buf,
3592} 3659}
3593 3660
3594enum slab_stat_type { 3661enum slab_stat_type {
3595 SL_FULL, 3662 SL_ALL, /* All slabs */
3596 SL_PARTIAL, 3663 SL_PARTIAL, /* Only partially allocated slabs */
3597 SL_CPU, 3664 SL_CPU, /* Only slabs used for cpu caches */
3598 SL_OBJECTS 3665 SL_OBJECTS, /* Determine allocated objects not slabs */
3666 SL_TOTAL /* Determine object capacity not slabs */
3599}; 3667};
3600 3668
3601#define SO_FULL (1 << SL_FULL) 3669#define SO_ALL (1 << SL_ALL)
3602#define SO_PARTIAL (1 << SL_PARTIAL) 3670#define SO_PARTIAL (1 << SL_PARTIAL)
3603#define SO_CPU (1 << SL_CPU) 3671#define SO_CPU (1 << SL_CPU)
3604#define SO_OBJECTS (1 << SL_OBJECTS) 3672#define SO_OBJECTS (1 << SL_OBJECTS)
3673#define SO_TOTAL (1 << SL_TOTAL)
3605 3674
3606static ssize_t show_slab_objects(struct kmem_cache *s, 3675static ssize_t show_slab_objects(struct kmem_cache *s,
3607 char *buf, unsigned long flags) 3676 char *buf, unsigned long flags)
3608{ 3677{
3609 unsigned long total = 0; 3678 unsigned long total = 0;
3610 int cpu;
3611 int node; 3679 int node;
3612 int x; 3680 int x;
3613 unsigned long *nodes; 3681 unsigned long *nodes;
@@ -3618,56 +3686,60 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
3618 return -ENOMEM; 3686 return -ENOMEM;
3619 per_cpu = nodes + nr_node_ids; 3687 per_cpu = nodes + nr_node_ids;
3620 3688
3621 for_each_possible_cpu(cpu) { 3689 if (flags & SO_CPU) {
3622 struct page *page; 3690 int cpu;
3623 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
3624 3691
3625 if (!c) 3692 for_each_possible_cpu(cpu) {
3626 continue; 3693 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
3627 3694
3628 page = c->page; 3695 if (!c || c->node < 0)
3629 node = c->node; 3696 continue;
3630 if (node < 0) 3697
3631 continue; 3698 if (c->page) {
3632 if (page) { 3699 if (flags & SO_TOTAL)
3633 if (flags & SO_CPU) { 3700 x = c->page->objects;
3634 if (flags & SO_OBJECTS) 3701 else if (flags & SO_OBJECTS)
3635 x = page->inuse; 3702 x = c->page->inuse;
3636 else 3703 else
3637 x = 1; 3704 x = 1;
3705
3638 total += x; 3706 total += x;
3639 nodes[node] += x; 3707 nodes[c->node] += x;
3640 } 3708 }
3641 per_cpu[node]++; 3709 per_cpu[c->node]++;
3642 } 3710 }
3643 } 3711 }
3644 3712
3645 for_each_node_state(node, N_NORMAL_MEMORY) { 3713 if (flags & SO_ALL) {
3646 struct kmem_cache_node *n = get_node(s, node); 3714 for_each_node_state(node, N_NORMAL_MEMORY) {
3715 struct kmem_cache_node *n = get_node(s, node);
3716
3717 if (flags & SO_TOTAL)
3718 x = atomic_long_read(&n->total_objects);
3719 else if (flags & SO_OBJECTS)
3720 x = atomic_long_read(&n->total_objects) -
3721 count_partial(n, count_free);
3647 3722
3648 if (flags & SO_PARTIAL) {
3649 if (flags & SO_OBJECTS)
3650 x = count_partial(n);
3651 else 3723 else
3652 x = n->nr_partial; 3724 x = atomic_long_read(&n->nr_slabs);
3653 total += x; 3725 total += x;
3654 nodes[node] += x; 3726 nodes[node] += x;
3655 } 3727 }
3656 3728
3657 if (flags & SO_FULL) { 3729 } else if (flags & SO_PARTIAL) {
3658 int full_slabs = atomic_long_read(&n->nr_slabs) 3730 for_each_node_state(node, N_NORMAL_MEMORY) {
3659 - per_cpu[node] 3731 struct kmem_cache_node *n = get_node(s, node);
3660 - n->nr_partial;
3661 3732
3662 if (flags & SO_OBJECTS) 3733 if (flags & SO_TOTAL)
3663 x = full_slabs * s->objects; 3734 x = count_partial(n, count_total);
3735 else if (flags & SO_OBJECTS)
3736 x = count_partial(n, count_inuse);
3664 else 3737 else
3665 x = full_slabs; 3738 x = n->nr_partial;
3666 total += x; 3739 total += x;
3667 nodes[node] += x; 3740 nodes[node] += x;
3668 } 3741 }
3669 } 3742 }
3670
3671 x = sprintf(buf, "%lu", total); 3743 x = sprintf(buf, "%lu", total);
3672#ifdef CONFIG_NUMA 3744#ifdef CONFIG_NUMA
3673 for_each_node_state(node, N_NORMAL_MEMORY) 3745 for_each_node_state(node, N_NORMAL_MEMORY)
@@ -3682,14 +3754,6 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
3682static int any_slab_objects(struct kmem_cache *s) 3754static int any_slab_objects(struct kmem_cache *s)
3683{ 3755{
3684 int node; 3756 int node;
3685 int cpu;
3686
3687 for_each_possible_cpu(cpu) {
3688 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
3689
3690 if (c && c->page)
3691 return 1;
3692 }
3693 3757
3694 for_each_online_node(node) { 3758 for_each_online_node(node) {
3695 struct kmem_cache_node *n = get_node(s, node); 3759 struct kmem_cache_node *n = get_node(s, node);
@@ -3697,7 +3761,7 @@ static int any_slab_objects(struct kmem_cache *s)
3697 if (!n) 3761 if (!n)
3698 continue; 3762 continue;
3699 3763
3700 if (n->nr_partial || atomic_long_read(&n->nr_slabs)) 3764 if (atomic_read(&n->total_objects))
3701 return 1; 3765 return 1;
3702 } 3766 }
3703 return 0; 3767 return 0;
@@ -3739,15 +3803,27 @@ SLAB_ATTR_RO(object_size);
3739 3803
3740static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf) 3804static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf)
3741{ 3805{
3742 return sprintf(buf, "%d\n", s->objects); 3806 return sprintf(buf, "%d\n", oo_objects(s->oo));
3743} 3807}
3744SLAB_ATTR_RO(objs_per_slab); 3808SLAB_ATTR_RO(objs_per_slab);
3745 3809
3810static ssize_t order_store(struct kmem_cache *s,
3811 const char *buf, size_t length)
3812{
3813 int order = simple_strtoul(buf, NULL, 10);
3814
3815 if (order > slub_max_order || order < slub_min_order)
3816 return -EINVAL;
3817
3818 calculate_sizes(s, order);
3819 return length;
3820}
3821
3746static ssize_t order_show(struct kmem_cache *s, char *buf) 3822static ssize_t order_show(struct kmem_cache *s, char *buf)
3747{ 3823{
3748 return sprintf(buf, "%d\n", s->order); 3824 return sprintf(buf, "%d\n", oo_order(s->oo));
3749} 3825}
3750SLAB_ATTR_RO(order); 3826SLAB_ATTR(order);
3751 3827
3752static ssize_t ctor_show(struct kmem_cache *s, char *buf) 3828static ssize_t ctor_show(struct kmem_cache *s, char *buf)
3753{ 3829{
@@ -3768,7 +3844,7 @@ SLAB_ATTR_RO(aliases);
3768 3844
3769static ssize_t slabs_show(struct kmem_cache *s, char *buf) 3845static ssize_t slabs_show(struct kmem_cache *s, char *buf)
3770{ 3846{
3771 return show_slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU); 3847 return show_slab_objects(s, buf, SO_ALL);
3772} 3848}
3773SLAB_ATTR_RO(slabs); 3849SLAB_ATTR_RO(slabs);
3774 3850
@@ -3786,10 +3862,22 @@ SLAB_ATTR_RO(cpu_slabs);
3786 3862
3787static ssize_t objects_show(struct kmem_cache *s, char *buf) 3863static ssize_t objects_show(struct kmem_cache *s, char *buf)
3788{ 3864{
3789 return show_slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU|SO_OBJECTS); 3865 return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS);
3790} 3866}
3791SLAB_ATTR_RO(objects); 3867SLAB_ATTR_RO(objects);
3792 3868
3869static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
3870{
3871 return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS);
3872}
3873SLAB_ATTR_RO(objects_partial);
3874
3875static ssize_t total_objects_show(struct kmem_cache *s, char *buf)
3876{
3877 return show_slab_objects(s, buf, SO_ALL|SO_TOTAL);
3878}
3879SLAB_ATTR_RO(total_objects);
3880
3793static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf) 3881static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)
3794{ 3882{
3795 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DEBUG_FREE)); 3883 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DEBUG_FREE));
@@ -3869,7 +3957,7 @@ static ssize_t red_zone_store(struct kmem_cache *s,
3869 s->flags &= ~SLAB_RED_ZONE; 3957 s->flags &= ~SLAB_RED_ZONE;
3870 if (buf[0] == '1') 3958 if (buf[0] == '1')
3871 s->flags |= SLAB_RED_ZONE; 3959 s->flags |= SLAB_RED_ZONE;
3872 calculate_sizes(s); 3960 calculate_sizes(s, -1);
3873 return length; 3961 return length;
3874} 3962}
3875SLAB_ATTR(red_zone); 3963SLAB_ATTR(red_zone);
@@ -3888,7 +3976,7 @@ static ssize_t poison_store(struct kmem_cache *s,
3888 s->flags &= ~SLAB_POISON; 3976 s->flags &= ~SLAB_POISON;
3889 if (buf[0] == '1') 3977 if (buf[0] == '1')
3890 s->flags |= SLAB_POISON; 3978 s->flags |= SLAB_POISON;
3891 calculate_sizes(s); 3979 calculate_sizes(s, -1);
3892 return length; 3980 return length;
3893} 3981}
3894SLAB_ATTR(poison); 3982SLAB_ATTR(poison);
@@ -3907,7 +3995,7 @@ static ssize_t store_user_store(struct kmem_cache *s,
3907 s->flags &= ~SLAB_STORE_USER; 3995 s->flags &= ~SLAB_STORE_USER;
3908 if (buf[0] == '1') 3996 if (buf[0] == '1')
3909 s->flags |= SLAB_STORE_USER; 3997 s->flags |= SLAB_STORE_USER;
3910 calculate_sizes(s); 3998 calculate_sizes(s, -1);
3911 return length; 3999 return length;
3912} 4000}
3913SLAB_ATTR(store_user); 4001SLAB_ATTR(store_user);
@@ -4038,7 +4126,7 @@ STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
4038STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head); 4126STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
4039STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail); 4127STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
4040STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees); 4128STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
4041 4129STAT_ATTR(ORDER_FALLBACK, order_fallback);
4042#endif 4130#endif
4043 4131
4044static struct attribute *slab_attrs[] = { 4132static struct attribute *slab_attrs[] = {
@@ -4047,6 +4135,8 @@ static struct attribute *slab_attrs[] = {
4047 &objs_per_slab_attr.attr, 4135 &objs_per_slab_attr.attr,
4048 &order_attr.attr, 4136 &order_attr.attr,
4049 &objects_attr.attr, 4137 &objects_attr.attr,
4138 &objects_partial_attr.attr,
4139 &total_objects_attr.attr,
4050 &slabs_attr.attr, 4140 &slabs_attr.attr,
4051 &partial_attr.attr, 4141 &partial_attr.attr,
4052 &cpu_slabs_attr.attr, 4142 &cpu_slabs_attr.attr,
@@ -4089,6 +4179,7 @@ static struct attribute *slab_attrs[] = {
4089 &deactivate_to_head_attr.attr, 4179 &deactivate_to_head_attr.attr,
4090 &deactivate_to_tail_attr.attr, 4180 &deactivate_to_tail_attr.attr,
4091 &deactivate_remote_frees_attr.attr, 4181 &deactivate_remote_frees_attr.attr,
4182 &order_fallback_attr.attr,
4092#endif 4183#endif
4093 NULL 4184 NULL
4094}; 4185};
@@ -4375,7 +4466,8 @@ static int s_show(struct seq_file *m, void *p)
4375 unsigned long nr_partials = 0; 4466 unsigned long nr_partials = 0;
4376 unsigned long nr_slabs = 0; 4467 unsigned long nr_slabs = 0;
4377 unsigned long nr_inuse = 0; 4468 unsigned long nr_inuse = 0;
4378 unsigned long nr_objs; 4469 unsigned long nr_objs = 0;
4470 unsigned long nr_free = 0;
4379 struct kmem_cache *s; 4471 struct kmem_cache *s;
4380 int node; 4472 int node;
4381 4473
@@ -4389,14 +4481,15 @@ static int s_show(struct seq_file *m, void *p)
4389 4481
4390 nr_partials += n->nr_partial; 4482 nr_partials += n->nr_partial;
4391 nr_slabs += atomic_long_read(&n->nr_slabs); 4483 nr_slabs += atomic_long_read(&n->nr_slabs);
4392 nr_inuse += count_partial(n); 4484 nr_objs += atomic_long_read(&n->total_objects);
4485 nr_free += count_partial(n, count_free);
4393 } 4486 }
4394 4487
4395 nr_objs = nr_slabs * s->objects; 4488 nr_inuse = nr_objs - nr_free;
4396 nr_inuse += (nr_slabs - nr_partials) * s->objects;
4397 4489
4398 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", s->name, nr_inuse, 4490 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", s->name, nr_inuse,
4399 nr_objs, s->size, s->objects, (1 << s->order)); 4491 nr_objs, s->size, oo_objects(s->oo),
4492 (1 << oo_order(s->oo)));
4400 seq_printf(m, " : tunables %4u %4u %4u", 0, 0, 0); 4493 seq_printf(m, " : tunables %4u %4u %4u", 0, 0, 0);
4401 seq_printf(m, " : slabdata %6lu %6lu %6lu", nr_slabs, nr_slabs, 4494 seq_printf(m, " : slabdata %6lu %6lu %6lu", nr_slabs, nr_slabs,
4402 0UL); 4495 0UL);