aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/kernel-parameters.txt6
-rw-r--r--Documentation/vm/slub.txt2
-rw-r--r--mm/slab.c39
-rw-r--r--mm/slub.c77
4 files changed, 82 insertions, 42 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index c92b1532f05a..a8d389d72405 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2395,6 +2395,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
2395 2395
2396 slram= [HW,MTD] 2396 slram= [HW,MTD]
2397 2397
2398 slab_max_order= [MM, SLAB]
2399 Determines the maximum allowed order for slabs.
2400 A high setting may cause OOMs due to memory
2401 fragmentation. Defaults to 1 for systems with
2402 more than 32MB of RAM, 0 otherwise.
2403
2398 slub_debug[=options[,slabs]] [MM, SLUB] 2404 slub_debug[=options[,slabs]] [MM, SLUB]
2399 Enabling slub_debug allows one to determine the 2405 Enabling slub_debug allows one to determine the
2400 culprit if slab objects become corrupted. Enabling 2406 culprit if slab objects become corrupted. Enabling
diff --git a/Documentation/vm/slub.txt b/Documentation/vm/slub.txt
index f464f47bc60d..2acdda9601b0 100644
--- a/Documentation/vm/slub.txt
+++ b/Documentation/vm/slub.txt
@@ -117,7 +117,7 @@ can be influenced by kernel parameters:
117 117
118slub_min_objects=x (default 4) 118slub_min_objects=x (default 4)
119slub_min_order=x (default 0) 119slub_min_order=x (default 0)
120slub_max_order=x (default 1) 120slub_max_order=x (default 3 (PAGE_ALLOC_COSTLY_ORDER))
121 121
122slub_min_objects allows to specify how many objects must at least fit 122slub_min_objects allows to specify how many objects must at least fit
123into one slab in order for the allocation order to be acceptable. 123into one slab in order for the allocation order to be acceptable.
diff --git a/mm/slab.c b/mm/slab.c
index 2acfa0d90943..f0bd7857ab3b 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -481,11 +481,13 @@ EXPORT_SYMBOL(slab_buffer_size);
481#endif 481#endif
482 482
483/* 483/*
484 * Do not go above this order unless 0 objects fit into the slab. 484 * Do not go above this order unless 0 objects fit into the slab or
485 * overridden on the command line.
485 */ 486 */
486#define BREAK_GFP_ORDER_HI 1 487#define SLAB_MAX_ORDER_HI 1
487#define BREAK_GFP_ORDER_LO 0 488#define SLAB_MAX_ORDER_LO 0
488static int slab_break_gfp_order = BREAK_GFP_ORDER_LO; 489static int slab_max_order = SLAB_MAX_ORDER_LO;
490static bool slab_max_order_set __initdata;
489 491
490/* 492/*
491 * Functions for storing/retrieving the cachep and or slab from the page 493 * Functions for storing/retrieving the cachep and or slab from the page
@@ -854,6 +856,17 @@ static int __init noaliencache_setup(char *s)
854} 856}
855__setup("noaliencache", noaliencache_setup); 857__setup("noaliencache", noaliencache_setup);
856 858
859static int __init slab_max_order_setup(char *str)
860{
861 get_option(&str, &slab_max_order);
862 slab_max_order = slab_max_order < 0 ? 0 :
863 min(slab_max_order, MAX_ORDER - 1);
864 slab_max_order_set = true;
865
866 return 1;
867}
868__setup("slab_max_order=", slab_max_order_setup);
869
857#ifdef CONFIG_NUMA 870#ifdef CONFIG_NUMA
858/* 871/*
859 * Special reaping functions for NUMA systems called from cache_reap(). 872 * Special reaping functions for NUMA systems called from cache_reap().
@@ -1502,10 +1515,11 @@ void __init kmem_cache_init(void)
1502 1515
1503 /* 1516 /*
1504 * Fragmentation resistance on low memory - only use bigger 1517 * Fragmentation resistance on low memory - only use bigger
1505 * page orders on machines with more than 32MB of memory. 1518 * page orders on machines with more than 32MB of memory if
1519 * not overridden on the command line.
1506 */ 1520 */
1507 if (totalram_pages > (32 << 20) >> PAGE_SHIFT) 1521 if (!slab_max_order_set && totalram_pages > (32 << 20) >> PAGE_SHIFT)
1508 slab_break_gfp_order = BREAK_GFP_ORDER_HI; 1522 slab_max_order = SLAB_MAX_ORDER_HI;
1509 1523
1510 /* Bootstrap is tricky, because several objects are allocated 1524 /* Bootstrap is tricky, because several objects are allocated
1511 * from caches that do not exist yet: 1525 * from caches that do not exist yet:
@@ -1932,8 +1946,8 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
1932 /* Print header */ 1946 /* Print header */
1933 if (lines == 0) { 1947 if (lines == 0) {
1934 printk(KERN_ERR 1948 printk(KERN_ERR
1935 "Slab corruption: %s start=%p, len=%d\n", 1949 "Slab corruption (%s): %s start=%p, len=%d\n",
1936 cachep->name, realobj, size); 1950 print_tainted(), cachep->name, realobj, size);
1937 print_objinfo(cachep, objp, 0); 1951 print_objinfo(cachep, objp, 0);
1938 } 1952 }
1939 /* Hexdump the affected line */ 1953 /* Hexdump the affected line */
@@ -2117,7 +2131,7 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
2117 * Large number of objects is good, but very large slabs are 2131 * Large number of objects is good, but very large slabs are
2118 * currently bad for the gfp()s. 2132 * currently bad for the gfp()s.
2119 */ 2133 */
2120 if (gfporder >= slab_break_gfp_order) 2134 if (gfporder >= slab_max_order)
2121 break; 2135 break;
2122 2136
2123 /* 2137 /*
@@ -3042,8 +3056,9 @@ static void check_slabp(struct kmem_cache *cachep, struct slab *slabp)
3042 if (entries != cachep->num - slabp->inuse) { 3056 if (entries != cachep->num - slabp->inuse) {
3043bad: 3057bad:
3044 printk(KERN_ERR "slab: Internal list corruption detected in " 3058 printk(KERN_ERR "slab: Internal list corruption detected in "
3045 "cache '%s'(%d), slabp %p(%d). Hexdump:\n", 3059 "cache '%s'(%d), slabp %p(%d). Tainted(%s). Hexdump:\n",
3046 cachep->name, cachep->num, slabp, slabp->inuse); 3060 cachep->name, cachep->num, slabp, slabp->inuse,
3061 print_tainted());
3047 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 16, 1, slabp, 3062 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 16, 1, slabp,
3048 sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t), 3063 sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t),
3049 1); 3064 1);
diff --git a/mm/slub.c b/mm/slub.c
index d99acbf14e01..5d37b5e44140 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -570,7 +570,7 @@ static void slab_bug(struct kmem_cache *s, char *fmt, ...)
570 va_end(args); 570 va_end(args);
571 printk(KERN_ERR "========================================" 571 printk(KERN_ERR "========================================"
572 "=====================================\n"); 572 "=====================================\n");
573 printk(KERN_ERR "BUG %s: %s\n", s->name, buf); 573 printk(KERN_ERR "BUG %s (%s): %s\n", s->name, print_tainted(), buf);
574 printk(KERN_ERR "----------------------------------------" 574 printk(KERN_ERR "----------------------------------------"
575 "-------------------------------------\n\n"); 575 "-------------------------------------\n\n");
576} 576}
@@ -1901,11 +1901,14 @@ static void unfreeze_partials(struct kmem_cache *s)
1901 } 1901 }
1902 1902
1903 if (l != m) { 1903 if (l != m) {
1904 if (l == M_PARTIAL) 1904 if (l == M_PARTIAL) {
1905 remove_partial(n, page); 1905 remove_partial(n, page);
1906 else 1906 stat(s, FREE_REMOVE_PARTIAL);
1907 } else {
1907 add_partial(n, page, 1908 add_partial(n, page,
1908 DEACTIVATE_TO_TAIL); 1909 DEACTIVATE_TO_TAIL);
1910 stat(s, FREE_ADD_PARTIAL);
1911 }
1909 1912
1910 l = m; 1913 l = m;
1911 } 1914 }
@@ -2124,6 +2127,37 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
2124} 2127}
2125 2128
2126/* 2129/*
2130 * Check the page->freelist of a page and either transfer the freelist to the per cpu freelist
2131 * or deactivate the page.
2132 *
2133 * The page is still frozen if the return value is not NULL.
2134 *
2135 * If this function returns NULL then the page has been unfrozen.
2136 */
2137static inline void *get_freelist(struct kmem_cache *s, struct page *page)
2138{
2139 struct page new;
2140 unsigned long counters;
2141 void *freelist;
2142
2143 do {
2144 freelist = page->freelist;
2145 counters = page->counters;
2146 new.counters = counters;
2147 VM_BUG_ON(!new.frozen);
2148
2149 new.inuse = page->objects;
2150 new.frozen = freelist != NULL;
2151
2152 } while (!cmpxchg_double_slab(s, page,
2153 freelist, counters,
2154 NULL, new.counters,
2155 "get_freelist"));
2156
2157 return freelist;
2158}
2159
2160/*
2127 * Slow path. The lockless freelist is empty or we need to perform 2161 * Slow path. The lockless freelist is empty or we need to perform
2128 * debugging duties. 2162 * debugging duties.
2129 * 2163 *
@@ -2144,8 +2178,6 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2144{ 2178{
2145 void **object; 2179 void **object;
2146 unsigned long flags; 2180 unsigned long flags;
2147 struct page new;
2148 unsigned long counters;
2149 2181
2150 local_irq_save(flags); 2182 local_irq_save(flags);
2151#ifdef CONFIG_PREEMPT 2183#ifdef CONFIG_PREEMPT
@@ -2166,31 +2198,14 @@ redo:
2166 goto new_slab; 2198 goto new_slab;
2167 } 2199 }
2168 2200
2169 stat(s, ALLOC_SLOWPATH); 2201 /* must check again c->freelist in case of cpu migration or IRQ */
2170 2202 object = c->freelist;
2171 do { 2203 if (object)
2172 object = c->page->freelist; 2204 goto load_freelist;
2173 counters = c->page->counters;
2174 new.counters = counters;
2175 VM_BUG_ON(!new.frozen);
2176
2177 /*
2178 * If there is no object left then we use this loop to
2179 * deactivate the slab which is simple since no objects
2180 * are left in the slab and therefore we do not need to
2181 * put the page back onto the partial list.
2182 *
2183 * If there are objects left then we retrieve them
2184 * and use them to refill the per cpu queue.
2185 */
2186 2205
2187 new.inuse = c->page->objects; 2206 stat(s, ALLOC_SLOWPATH);
2188 new.frozen = object != NULL;
2189 2207
2190 } while (!__cmpxchg_double_slab(s, c->page, 2208 object = get_freelist(s, c->page);
2191 object, counters,
2192 NULL, new.counters,
2193 "__slab_alloc"));
2194 2209
2195 if (!object) { 2210 if (!object) {
2196 c->page = NULL; 2211 c->page = NULL;
@@ -3028,7 +3043,9 @@ static int kmem_cache_open(struct kmem_cache *s,
3028 * per node list when we run out of per cpu objects. We only fetch 50% 3043 * per node list when we run out of per cpu objects. We only fetch 50%
3029 * to keep some capacity around for frees. 3044 * to keep some capacity around for frees.
3030 */ 3045 */
3031 if (s->size >= PAGE_SIZE) 3046 if (kmem_cache_debug(s))
3047 s->cpu_partial = 0;
3048 else if (s->size >= PAGE_SIZE)
3032 s->cpu_partial = 2; 3049 s->cpu_partial = 2;
3033 else if (s->size >= 1024) 3050 else if (s->size >= 1024)
3034 s->cpu_partial = 6; 3051 s->cpu_partial = 6;
@@ -4637,6 +4654,8 @@ static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
4637 err = strict_strtoul(buf, 10, &objects); 4654 err = strict_strtoul(buf, 10, &objects);
4638 if (err) 4655 if (err)
4639 return err; 4656 return err;
4657 if (objects && kmem_cache_debug(s))
4658 return -EINVAL;
4640 4659
4641 s->cpu_partial = objects; 4660 s->cpu_partial = objects;
4642 flush_all(s); 4661 flush_all(s);