Merge branch 'slab/for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/linux

* 'slab/for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/linux: slub: disallow changing cpu_partial from userspace for debug caches slub: add missed accounting slub: Extract get_freelist from __slab_alloc slub: Switch per cpu partial page support off for debugging slub: fix a possible memleak in __slab_alloc() slub: fix slub_max_order Documentation slub: add missed accounting slab: add taint flag outputting to debug paths. slub: add taint flag outputting to debug paths slab: introduce slab_max_order kernel parameter slab: rename slab_break_gfp_order to slab_max_order
author: Linus Torvalds <torvalds@linux-foundation.org> 2012-01-11 21:52:23 -0500
committer: Linus Torvalds <torvalds@linux-foundation.org> 2012-01-11 21:52:23 -0500
commit: 6296e5d3c067df41980a5fd09ad4cc6765f79bb9 (patch)
tree: ac10bc5321ac1d750612c0e0ae53d6c4097c5734 /mm
parent: c086ae4ed94f9a1d283318e006813268c2dbf9fc (diff)
parent: 5878cf431ca7233a56819ca6970153ac0b129599 (diff)
2 files changed, 75 insertions, 41 deletions
diff --git a/mm/slab.c b/mm/slab.c
index 2acfa0d90943..f0bd7857ab3b 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -481,11 +481,13 @@ EXPORT_SYMBOL(slab_buffer_size);
 #endif
 /*
- * Do not go above this order unless 0 objects fit into the slab.
+ * Do not go above this order unless 0 objects fit into the slab or
+ * overridden on the command line.
 */
-#define BREAK_GFP_ORDER_HI      1
+#define SLAB_MAX_ORDER_HI       1
-#define BREAK_GFP_ORDER_LO      0
+#define SLAB_MAX_ORDER_LO       0
-static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
+static int slab_max_order = SLAB_MAX_ORDER_LO;
+static bool slab_max_order_set __initdata;
 /*
 * Functions for storing/retrieving the cachep and or slab from the page
@@ -854,6 +856,17 @@ static int __init noaliencache_setup(char *s)
 }
 __setup("noaliencache", noaliencache_setup);
+static int __init slab_max_order_setup(char *str)
+{
+        get_option(&str, &slab_max_order);
+        slab_max_order = slab_max_order < 0 ? 0 :
+                                min(slab_max_order, MAX_ORDER - 1);
+        slab_max_order_set = true;
+        return 1;
+}
+__setup("slab_max_order=", slab_max_order_setup);
 #ifdef CONFIG_NUMA
 /*
 * Special reaping functions for NUMA systems called from cache_reap().
@@ -1502,10 +1515,11 @@ void __init kmem_cache_init(void)
        /*
         * Fragmentation resistance on low memory - only use bigger
-         * page orders on machines with more than 32MB of memory.
+         * page orders on machines with more than 32MB of memory if
+         * not overridden on the command line.
         */
-        if (totalram_pages > (32 << 20) >> PAGE_SHIFT)
+        if (!slab_max_order_set && totalram_pages > (32 << 20) >> PAGE_SHIFT)
-                slab_break_gfp_order = BREAK_GFP_ORDER_HI;
+                slab_max_order = SLAB_MAX_ORDER_HI;
        /* Bootstrap is tricky, because several objects are allocated
         * from caches that do not exist yet:
@@ -1932,8 +1946,8 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
                        /* Print header */
                        if (lines == 0) {
                                printk(KERN_ERR
-                                        "Slab corruption: %s start=%p, len=%d\n",
+                                        "Slab corruption (%s): %s start=%p, len=%d\n",
-                                        cachep->name, realobj, size);
+                                        print_tainted(), cachep->name, realobj, size);
                                print_objinfo(cachep, objp, 0);
                        }
                        /* Hexdump the affected line */
@@ -2117,7 +2131,7 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
                 * Large number of objects is good, but very large slabs are
                 * currently bad for the gfp()s.
                 */
-                if (gfporder >= slab_break_gfp_order)
+                if (gfporder >= slab_max_order)
                        break;
                /*
@@ -3042,8 +3056,9 @@ static void check_slabp(struct kmem_cache *cachep, struct slab *slabp)
        if (entries != cachep->num - slabp->inuse) {
 bad:
                printk(KERN_ERR "slab: Internal list corruption detected in "
-                                "cache '%s'(%d), slabp %p(%d). Hexdump:\n",
+                        "cache '%s'(%d), slabp %p(%d). Tainted(%s). Hexdump:\n",
-                        cachep->name, cachep->num, slabp, slabp->inuse);
+                        cachep->name, cachep->num, slabp, slabp->inuse,
+                        print_tainted());
                print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 16, 1, slabp,
                        sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t),
                        1);
diff --git a/mm/slub.c b/mm/slub.c
index d99acbf14e01..5d37b5e44140 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -570,7 +570,7 @@ static void slab_bug(struct kmem_cache *s, char *fmt, ...)
        va_end(args);
        printk(KERN_ERR "========================================"
                        "=====================================\n");
-        printk(KERN_ERR "BUG %s: %s\n", s->name, buf);
+        printk(KERN_ERR "BUG %s (%s): %s\n", s->name, print_tainted(), buf);
        printk(KERN_ERR "----------------------------------------"
                        "-------------------------------------\n\n");
 }
@@ -1901,11 +1901,14 @@ static void unfreeze_partials(struct kmem_cache *s)
                        }
                        if (l != m) {
-                                if (l == M_PARTIAL)
+                                if (l == M_PARTIAL) {
                                        remove_partial(n, page);
-                                else
+                                        stat(s, FREE_REMOVE_PARTIAL);
+                                } else {
                                        add_partial(n, page,
                                                DEACTIVATE_TO_TAIL);
+                                        stat(s, FREE_ADD_PARTIAL);
+                                }
                                l = m;
                        }
@@ -2124,6 +2127,37 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
 }
 /*
+ * Check the page->freelist of a page and either transfer the freelist to the per cpu freelist
+ * or deactivate the page.
+ *
+ * The page is still frozen if the return value is not NULL.
+ *
+ * If this function returns NULL then the page has been unfrozen.
+ */
+static inline void *get_freelist(struct kmem_cache *s, struct page *page)
+{
+        struct page new;
+        unsigned long counters;
+        void *freelist;
+        do {
+                freelist = page->freelist;
+                counters = page->counters;
+                new.counters = counters;
+                VM_BUG_ON(!new.frozen);
+                new.inuse = page->objects;
+                new.frozen = freelist != NULL;
+        } while (!cmpxchg_double_slab(s, page,
+                freelist, counters,
+                NULL, new.counters,
+                "get_freelist"));
+        return freelist;
+}
+/*
 * Slow path. The lockless freelist is empty or we need to perform
 * debugging duties.
 *
@@ -2144,8 +2178,6 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
 {
        void **object;
        unsigned long flags;
-        struct page new;
-        unsigned long counters;
        local_irq_save(flags);
 #ifdef CONFIG_PREEMPT
@@ -2166,31 +2198,14 @@ redo:
                goto new_slab;
        }
-        stat(s, ALLOC_SLOWPATH);
+        /* must check again c->freelist in case of cpu migration or IRQ */
+        object = c->freelist;
-        do {
+        if (object)
-                object = c->page->freelist;
+                goto load_freelist;
-                counters = c->page->counters;
-                new.counters = counters;
-                VM_BUG_ON(!new.frozen);
-                /*
-                 * If there is no object left then we use this loop to
-                 * deactivate the slab which is simple since no objects
-                 * are left in the slab and therefore we do not need to
-                 * put the page back onto the partial list.
-                 *
-                 * If there are objects left then we retrieve them
-                 * and use them to refill the per cpu queue.
-                 */
-                new.inuse = c->page->objects;
+        stat(s, ALLOC_SLOWPATH);
-                new.frozen = object != NULL;
-        } while (!__cmpxchg_double_slab(s, c->page,
+        object = get_freelist(s, c->page);
-                        object, counters,
-                        NULL, new.counters,
-                        "__slab_alloc"));
        if (!object) {
                c->page = NULL;
@@ -3028,7 +3043,9 @@ static int kmem_cache_open(struct kmem_cache *s,
         *    per node list when we run out of per cpu objects. We only fetch 50%
         *    to keep some capacity around for frees.
         */
-        if (s->size >= PAGE_SIZE)
+        if (kmem_cache_debug(s))
+                s->cpu_partial = 0;
+        else if (s->size >= PAGE_SIZE)
                s->cpu_partial = 2;
        else if (s->size >= 1024)
                s->cpu_partial = 6;
@@ -4637,6 +4654,8 @@ static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
        err = strict_strtoul(buf, 10, &objects);
        if (err)
                return err;
+        if (objects && kmem_cache_debug(s))
+                return -EINVAL;
        s->cpu_partial = objects;
        flush_all(s);
author	Linus Torvalds <torvalds@linux-foundation.org>	2012-01-11 21:52:23 -0500
committer	Linus Torvalds <torvalds@linux-foundation.org>	2012-01-11 21:52:23 -0500
commit	6296e5d3c067df41980a5fd09ad4cc6765f79bb9 (patch)
tree	ac10bc5321ac1d750612c0e0ae53d6c4097c5734 /mm
parent	c086ae4ed94f9a1d283318e006813268c2dbf9fc (diff)
parent	5878cf431ca7233a56819ca6970153ac0b129599 (diff)

diff --git a/mm/slab.c b/mm/slab.c index 2acfa0d90943..f0bd7857ab3b 100644 --- a/mm/slab.c +++ b/mm/slab.c
@@ -481,11 +481,13 @@ EXPORT_SYMBOL(slab_buffer_size);
481	#endif	481	#endif
482		482
483	/*	483	/*
484	* Do not go above this order unless 0 objects fit into the slab.	484	* Do not go above this order unless 0 objects fit into the slab or
		485	* overridden on the command line.
485	*/	486	*/
486	#define BREAK_GFP_ORDER_HI 1	487	#define SLAB_MAX_ORDER_HI 1
487	#define BREAK_GFP_ORDER_LO 0	488	#define SLAB_MAX_ORDER_LO 0
488	static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;	489	static int slab_max_order = SLAB_MAX_ORDER_LO;
		490	static bool slab_max_order_set __initdata;
489		491
490	/*	492	/*
491	* Functions for storing/retrieving the cachep and or slab from the page	493	* Functions for storing/retrieving the cachep and or slab from the page
@@ -854,6 +856,17 @@ static int __init noaliencache_setup(char *s)
854	}	856	}
855	__setup("noaliencache", noaliencache_setup);	857	__setup("noaliencache", noaliencache_setup);
856		858
		859	static int __init slab_max_order_setup(char *str)
		860	{
		861	get_option(&str, &slab_max_order);
		862	slab_max_order = slab_max_order < 0 ? 0 :
		863	min(slab_max_order, MAX_ORDER - 1);
		864	slab_max_order_set = true;
		865
		866	return 1;
		867	}
		868	__setup("slab_max_order=", slab_max_order_setup);
		869
857	#ifdef CONFIG_NUMA	870	#ifdef CONFIG_NUMA
858	/*	871	/*
859	* Special reaping functions for NUMA systems called from cache_reap().	872	* Special reaping functions for NUMA systems called from cache_reap().
@@ -1502,10 +1515,11 @@ void __init kmem_cache_init(void)
1502		1515
1503	/*	1516	/*
1504	* Fragmentation resistance on low memory - only use bigger	1517	* Fragmentation resistance on low memory - only use bigger
1505	* page orders on machines with more than 32MB of memory.	1518	* page orders on machines with more than 32MB of memory if
		1519	* not overridden on the command line.
1506	*/	1520	*/
1507	if (totalram_pages > (32 << 20) >> PAGE_SHIFT)	1521	if (!slab_max_order_set && totalram_pages > (32 << 20) >> PAGE_SHIFT)
1508	slab_break_gfp_order = BREAK_GFP_ORDER_HI;	1522	slab_max_order = SLAB_MAX_ORDER_HI;
1509		1523
1510	/* Bootstrap is tricky, because several objects are allocated	1524	/* Bootstrap is tricky, because several objects are allocated
1511	* from caches that do not exist yet:	1525	* from caches that do not exist yet:
@@ -1932,8 +1946,8 @@ static void check_poison_obj(struct kmem_cache cachep, void objp)
1932	/* Print header */	1946	/* Print header */
1933	if (lines == 0) {	1947	if (lines == 0) {
1934	printk(KERN_ERR	1948	printk(KERN_ERR
1935	"Slab corruption: %s start=%p, len=%d\n",	1949	"Slab corruption (%s): %s start=%p, len=%d\n",
1936	cachep->name, realobj, size);	1950	print_tainted(), cachep->name, realobj, size);
1937	print_objinfo(cachep, objp, 0);	1951	print_objinfo(cachep, objp, 0);
1938	}	1952	}
1939	/* Hexdump the affected line */	1953	/* Hexdump the affected line */
@@ -2117,7 +2131,7 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
2117	* Large number of objects is good, but very large slabs are	2131	* Large number of objects is good, but very large slabs are
2118	* currently bad for the gfp()s.	2132	* currently bad for the gfp()s.
2119	*/	2133	*/
2120	if (gfporder >= slab_break_gfp_order)	2134	if (gfporder >= slab_max_order)
2121	break;	2135	break;
2122		2136
2123	/*	2137	/*
@@ -3042,8 +3056,9 @@ static void check_slabp(struct kmem_cache cachep, struct slab slabp)
3042	if (entries != cachep->num - slabp->inuse) {	3056	if (entries != cachep->num - slabp->inuse) {
3043	bad:	3057	bad:
3044	printk(KERN_ERR "slab: Internal list corruption detected in "	3058	printk(KERN_ERR "slab: Internal list corruption detected in "
3045	"cache '%s'(%d), slabp %p(%d). Hexdump:\n",	3059	"cache '%s'(%d), slabp %p(%d). Tainted(%s). Hexdump:\n",
3046	cachep->name, cachep->num, slabp, slabp->inuse);	3060	cachep->name, cachep->num, slabp, slabp->inuse,
		3061	print_tainted());
3047	print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 16, 1, slabp,	3062	print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 16, 1, slabp,
3048	sizeof(slabp) + cachep->num sizeof(kmem_bufctl_t),	3063	sizeof(slabp) + cachep->num sizeof(kmem_bufctl_t),
3049	1);	3064	1);


diff --git a/mm/slub.c b/mm/slub.c index d99acbf14e01..5d37b5e44140 100644 --- a/mm/slub.c +++ b/mm/slub.c
@@ -570,7 +570,7 @@ static void slab_bug(struct kmem_cache s, char fmt, ...)
570	va_end(args);	570	va_end(args);
571	printk(KERN_ERR "========================================"	571	printk(KERN_ERR "========================================"
572	"=====================================\n");	572	"=====================================\n");
573	printk(KERN_ERR "BUG %s: %s\n", s->name, buf);	573	printk(KERN_ERR "BUG %s (%s): %s\n", s->name, print_tainted(), buf);
574	printk(KERN_ERR "----------------------------------------"	574	printk(KERN_ERR "----------------------------------------"
575	"-------------------------------------\n\n");	575	"-------------------------------------\n\n");
576	}	576	}
@@ -1901,11 +1901,14 @@ static void unfreeze_partials(struct kmem_cache *s)
1901	}	1901	}
1902		1902
1903	if (l != m) {	1903	if (l != m) {
1904	if (l == M_PARTIAL)	1904	if (l == M_PARTIAL) {
1905	remove_partial(n, page);	1905	remove_partial(n, page);
1906	else	1906	stat(s, FREE_REMOVE_PARTIAL);
		1907	} else {
1907	add_partial(n, page,	1908	add_partial(n, page,
1908	DEACTIVATE_TO_TAIL);	1909	DEACTIVATE_TO_TAIL);
		1910	stat(s, FREE_ADD_PARTIAL);
		1911	}
1909		1912
1910	l = m;	1913	l = m;
1911	}	1914	}
@@ -2124,6 +2127,37 @@ static inline void new_slab_objects(struct kmem_cache s, gfp_t flags,
2124	}	2127	}
2125		2128
2126	/*	2129	/*
		2130	* Check the page->freelist of a page and either transfer the freelist to the per cpu freelist
		2131	* or deactivate the page.
		2132	*
		2133	* The page is still frozen if the return value is not NULL.
		2134	*
		2135	* If this function returns NULL then the page has been unfrozen.
		2136	*/
		2137	static inline void get_freelist(struct kmem_cache s, struct page *page)
		2138	{
		2139	struct page new;
		2140	unsigned long counters;
		2141	void *freelist;
		2142
		2143	do {
		2144	freelist = page->freelist;
		2145	counters = page->counters;
		2146	new.counters = counters;
		2147	VM_BUG_ON(!new.frozen);
		2148
		2149	new.inuse = page->objects;
		2150	new.frozen = freelist != NULL;
		2151
		2152	} while (!cmpxchg_double_slab(s, page,
		2153	freelist, counters,
		2154	NULL, new.counters,
		2155	"get_freelist"));
		2156
		2157	return freelist;
		2158	}
		2159
		2160	/*
2127	* Slow path. The lockless freelist is empty or we need to perform	2161	* Slow path. The lockless freelist is empty or we need to perform
2128	* debugging duties.	2162	* debugging duties.
2129	*	2163	*
@@ -2144,8 +2178,6 @@ static void __slab_alloc(struct kmem_cache s, gfp_t gfpflags, int node,
2144	{	2178	{
2145	void **object;	2179	void **object;
2146	unsigned long flags;	2180	unsigned long flags;
2147	struct page new;
2148	unsigned long counters;
2149		2181
2150	local_irq_save(flags);	2182	local_irq_save(flags);
2151	#ifdef CONFIG_PREEMPT	2183	#ifdef CONFIG_PREEMPT
@@ -2166,31 +2198,14 @@ redo:
2166	goto new_slab;	2198	goto new_slab;
2167	}	2199	}
2168		2200
2169	stat(s, ALLOC_SLOWPATH);	2201	/* must check again c->freelist in case of cpu migration or IRQ */
2170		2202	object = c->freelist;
2171	do {	2203	if (object)
2172	object = c->page->freelist;	2204	goto load_freelist;
2173	counters = c->page->counters;
2174	new.counters = counters;
2175	VM_BUG_ON(!new.frozen);
2176
2177	/*
2178	* If there is no object left then we use this loop to
2179	* deactivate the slab which is simple since no objects
2180	* are left in the slab and therefore we do not need to
2181	* put the page back onto the partial list.
2182	*
2183	* If there are objects left then we retrieve them
2184	* and use them to refill the per cpu queue.
2185	*/
2186		2205
2187	new.inuse = c->page->objects;	2206	stat(s, ALLOC_SLOWPATH);
2188	new.frozen = object != NULL;
2189		2207
2190	} while (!__cmpxchg_double_slab(s, c->page,	2208	object = get_freelist(s, c->page);
2191	object, counters,
2192	NULL, new.counters,
2193	"__slab_alloc"));
2194		2209
2195	if (!object) {	2210	if (!object) {
2196	c->page = NULL;	2211	c->page = NULL;
@@ -3028,7 +3043,9 @@ static int kmem_cache_open(struct kmem_cache *s,
3028	* per node list when we run out of per cpu objects. We only fetch 50%	3043	* per node list when we run out of per cpu objects. We only fetch 50%
3029	* to keep some capacity around for frees.	3044	* to keep some capacity around for frees.
3030	*/	3045	*/
3031	if (s->size >= PAGE_SIZE)	3046	if (kmem_cache_debug(s))
		3047	s->cpu_partial = 0;
		3048	else if (s->size >= PAGE_SIZE)
3032	s->cpu_partial = 2;	3049	s->cpu_partial = 2;
3033	else if (s->size >= 1024)	3050	else if (s->size >= 1024)
3034	s->cpu_partial = 6;	3051	s->cpu_partial = 6;
@@ -4637,6 +4654,8 @@ static ssize_t cpu_partial_store(struct kmem_cache s, const char buf,
4637	err = strict_strtoul(buf, 10, &objects);	4654	err = strict_strtoul(buf, 10, &objects);
4638	if (err)	4655	if (err)
4639	return err;	4656	return err;
		4657	if (objects && kmem_cache_debug(s))
		4658	return -EINVAL;
4640		4659
4641	s->cpu_partial = objects;	4660	s->cpu_partial = objects;
4642	flush_all(s);	4661	flush_all(s);