diff options
-rw-r--r-- | Documentation/kernel-parameters.txt | 6 | ||||
-rw-r--r-- | Documentation/vm/slub.txt | 2 | ||||
-rw-r--r-- | mm/slab.c | 39 | ||||
-rw-r--r-- | mm/slub.c | 77 |
4 files changed, 82 insertions, 42 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index c92b1532f05a..a8d389d72405 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -2395,6 +2395,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
2395 | 2395 | ||
2396 | slram= [HW,MTD] | 2396 | slram= [HW,MTD] |
2397 | 2397 | ||
2398 | slab_max_order= [MM, SLAB] | ||
2399 | Determines the maximum allowed order for slabs. | ||
2400 | A high setting may cause OOMs due to memory | ||
2401 | fragmentation. Defaults to 1 for systems with | ||
2402 | more than 32MB of RAM, 0 otherwise. | ||
2403 | |||
2398 | slub_debug[=options[,slabs]] [MM, SLUB] | 2404 | slub_debug[=options[,slabs]] [MM, SLUB] |
2399 | Enabling slub_debug allows one to determine the | 2405 | Enabling slub_debug allows one to determine the |
2400 | culprit if slab objects become corrupted. Enabling | 2406 | culprit if slab objects become corrupted. Enabling |
diff --git a/Documentation/vm/slub.txt b/Documentation/vm/slub.txt index f464f47bc60d..2acdda9601b0 100644 --- a/Documentation/vm/slub.txt +++ b/Documentation/vm/slub.txt | |||
@@ -117,7 +117,7 @@ can be influenced by kernel parameters: | |||
117 | 117 | ||
118 | slub_min_objects=x (default 4) | 118 | slub_min_objects=x (default 4) |
119 | slub_min_order=x (default 0) | 119 | slub_min_order=x (default 0) |
120 | slub_max_order=x (default 1) | 120 | slub_max_order=x (default 3 (PAGE_ALLOC_COSTLY_ORDER)) |
121 | 121 | ||
122 | slub_min_objects allows to specify how many objects must at least fit | 122 | slub_min_objects allows to specify how many objects must at least fit |
123 | into one slab in order for the allocation order to be acceptable. | 123 | into one slab in order for the allocation order to be acceptable. |
@@ -481,11 +481,13 @@ EXPORT_SYMBOL(slab_buffer_size); | |||
481 | #endif | 481 | #endif |
482 | 482 | ||
483 | /* | 483 | /* |
484 | * Do not go above this order unless 0 objects fit into the slab. | 484 | * Do not go above this order unless 0 objects fit into the slab or |
485 | * overridden on the command line. | ||
485 | */ | 486 | */ |
486 | #define BREAK_GFP_ORDER_HI 1 | 487 | #define SLAB_MAX_ORDER_HI 1 |
487 | #define BREAK_GFP_ORDER_LO 0 | 488 | #define SLAB_MAX_ORDER_LO 0 |
488 | static int slab_break_gfp_order = BREAK_GFP_ORDER_LO; | 489 | static int slab_max_order = SLAB_MAX_ORDER_LO; |
490 | static bool slab_max_order_set __initdata; | ||
489 | 491 | ||
490 | /* | 492 | /* |
491 | * Functions for storing/retrieving the cachep and or slab from the page | 493 | * Functions for storing/retrieving the cachep and or slab from the page |
@@ -854,6 +856,17 @@ static int __init noaliencache_setup(char *s) | |||
854 | } | 856 | } |
855 | __setup("noaliencache", noaliencache_setup); | 857 | __setup("noaliencache", noaliencache_setup); |
856 | 858 | ||
859 | static int __init slab_max_order_setup(char *str) | ||
860 | { | ||
861 | get_option(&str, &slab_max_order); | ||
862 | slab_max_order = slab_max_order < 0 ? 0 : | ||
863 | min(slab_max_order, MAX_ORDER - 1); | ||
864 | slab_max_order_set = true; | ||
865 | |||
866 | return 1; | ||
867 | } | ||
868 | __setup("slab_max_order=", slab_max_order_setup); | ||
869 | |||
857 | #ifdef CONFIG_NUMA | 870 | #ifdef CONFIG_NUMA |
858 | /* | 871 | /* |
859 | * Special reaping functions for NUMA systems called from cache_reap(). | 872 | * Special reaping functions for NUMA systems called from cache_reap(). |
@@ -1502,10 +1515,11 @@ void __init kmem_cache_init(void) | |||
1502 | 1515 | ||
1503 | /* | 1516 | /* |
1504 | * Fragmentation resistance on low memory - only use bigger | 1517 | * Fragmentation resistance on low memory - only use bigger |
1505 | * page orders on machines with more than 32MB of memory. | 1518 | * page orders on machines with more than 32MB of memory if |
1519 | * not overridden on the command line. | ||
1506 | */ | 1520 | */ |
1507 | if (totalram_pages > (32 << 20) >> PAGE_SHIFT) | 1521 | if (!slab_max_order_set && totalram_pages > (32 << 20) >> PAGE_SHIFT) |
1508 | slab_break_gfp_order = BREAK_GFP_ORDER_HI; | 1522 | slab_max_order = SLAB_MAX_ORDER_HI; |
1509 | 1523 | ||
1510 | /* Bootstrap is tricky, because several objects are allocated | 1524 | /* Bootstrap is tricky, because several objects are allocated |
1511 | * from caches that do not exist yet: | 1525 | * from caches that do not exist yet: |
@@ -1932,8 +1946,8 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp) | |||
1932 | /* Print header */ | 1946 | /* Print header */ |
1933 | if (lines == 0) { | 1947 | if (lines == 0) { |
1934 | printk(KERN_ERR | 1948 | printk(KERN_ERR |
1935 | "Slab corruption: %s start=%p, len=%d\n", | 1949 | "Slab corruption (%s): %s start=%p, len=%d\n", |
1936 | cachep->name, realobj, size); | 1950 | print_tainted(), cachep->name, realobj, size); |
1937 | print_objinfo(cachep, objp, 0); | 1951 | print_objinfo(cachep, objp, 0); |
1938 | } | 1952 | } |
1939 | /* Hexdump the affected line */ | 1953 | /* Hexdump the affected line */ |
@@ -2117,7 +2131,7 @@ static size_t calculate_slab_order(struct kmem_cache *cachep, | |||
2117 | * Large number of objects is good, but very large slabs are | 2131 | * Large number of objects is good, but very large slabs are |
2118 | * currently bad for the gfp()s. | 2132 | * currently bad for the gfp()s. |
2119 | */ | 2133 | */ |
2120 | if (gfporder >= slab_break_gfp_order) | 2134 | if (gfporder >= slab_max_order) |
2121 | break; | 2135 | break; |
2122 | 2136 | ||
2123 | /* | 2137 | /* |
@@ -3042,8 +3056,9 @@ static void check_slabp(struct kmem_cache *cachep, struct slab *slabp) | |||
3042 | if (entries != cachep->num - slabp->inuse) { | 3056 | if (entries != cachep->num - slabp->inuse) { |
3043 | bad: | 3057 | bad: |
3044 | printk(KERN_ERR "slab: Internal list corruption detected in " | 3058 | printk(KERN_ERR "slab: Internal list corruption detected in " |
3045 | "cache '%s'(%d), slabp %p(%d). Hexdump:\n", | 3059 | "cache '%s'(%d), slabp %p(%d). Tainted(%s). Hexdump:\n", |
3046 | cachep->name, cachep->num, slabp, slabp->inuse); | 3060 | cachep->name, cachep->num, slabp, slabp->inuse, |
3061 | print_tainted()); | ||
3047 | print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 16, 1, slabp, | 3062 | print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 16, 1, slabp, |
3048 | sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t), | 3063 | sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t), |
3049 | 1); | 3064 | 1); |
@@ -570,7 +570,7 @@ static void slab_bug(struct kmem_cache *s, char *fmt, ...) | |||
570 | va_end(args); | 570 | va_end(args); |
571 | printk(KERN_ERR "========================================" | 571 | printk(KERN_ERR "========================================" |
572 | "=====================================\n"); | 572 | "=====================================\n"); |
573 | printk(KERN_ERR "BUG %s: %s\n", s->name, buf); | 573 | printk(KERN_ERR "BUG %s (%s): %s\n", s->name, print_tainted(), buf); |
574 | printk(KERN_ERR "----------------------------------------" | 574 | printk(KERN_ERR "----------------------------------------" |
575 | "-------------------------------------\n\n"); | 575 | "-------------------------------------\n\n"); |
576 | } | 576 | } |
@@ -1901,11 +1901,14 @@ static void unfreeze_partials(struct kmem_cache *s) | |||
1901 | } | 1901 | } |
1902 | 1902 | ||
1903 | if (l != m) { | 1903 | if (l != m) { |
1904 | if (l == M_PARTIAL) | 1904 | if (l == M_PARTIAL) { |
1905 | remove_partial(n, page); | 1905 | remove_partial(n, page); |
1906 | else | 1906 | stat(s, FREE_REMOVE_PARTIAL); |
1907 | } else { | ||
1907 | add_partial(n, page, | 1908 | add_partial(n, page, |
1908 | DEACTIVATE_TO_TAIL); | 1909 | DEACTIVATE_TO_TAIL); |
1910 | stat(s, FREE_ADD_PARTIAL); | ||
1911 | } | ||
1909 | 1912 | ||
1910 | l = m; | 1913 | l = m; |
1911 | } | 1914 | } |
@@ -2124,6 +2127,37 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags, | |||
2124 | } | 2127 | } |
2125 | 2128 | ||
2126 | /* | 2129 | /* |
2130 | * Check the page->freelist of a page and either transfer the freelist to the per cpu freelist | ||
2131 | * or deactivate the page. | ||
2132 | * | ||
2133 | * The page is still frozen if the return value is not NULL. | ||
2134 | * | ||
2135 | * If this function returns NULL then the page has been unfrozen. | ||
2136 | */ | ||
2137 | static inline void *get_freelist(struct kmem_cache *s, struct page *page) | ||
2138 | { | ||
2139 | struct page new; | ||
2140 | unsigned long counters; | ||
2141 | void *freelist; | ||
2142 | |||
2143 | do { | ||
2144 | freelist = page->freelist; | ||
2145 | counters = page->counters; | ||
2146 | new.counters = counters; | ||
2147 | VM_BUG_ON(!new.frozen); | ||
2148 | |||
2149 | new.inuse = page->objects; | ||
2150 | new.frozen = freelist != NULL; | ||
2151 | |||
2152 | } while (!cmpxchg_double_slab(s, page, | ||
2153 | freelist, counters, | ||
2154 | NULL, new.counters, | ||
2155 | "get_freelist")); | ||
2156 | |||
2157 | return freelist; | ||
2158 | } | ||
2159 | |||
2160 | /* | ||
2127 | * Slow path. The lockless freelist is empty or we need to perform | 2161 | * Slow path. The lockless freelist is empty or we need to perform |
2128 | * debugging duties. | 2162 | * debugging duties. |
2129 | * | 2163 | * |
@@ -2144,8 +2178,6 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, | |||
2144 | { | 2178 | { |
2145 | void **object; | 2179 | void **object; |
2146 | unsigned long flags; | 2180 | unsigned long flags; |
2147 | struct page new; | ||
2148 | unsigned long counters; | ||
2149 | 2181 | ||
2150 | local_irq_save(flags); | 2182 | local_irq_save(flags); |
2151 | #ifdef CONFIG_PREEMPT | 2183 | #ifdef CONFIG_PREEMPT |
@@ -2166,31 +2198,14 @@ redo: | |||
2166 | goto new_slab; | 2198 | goto new_slab; |
2167 | } | 2199 | } |
2168 | 2200 | ||
2169 | stat(s, ALLOC_SLOWPATH); | 2201 | /* must check again c->freelist in case of cpu migration or IRQ */ |
2170 | 2202 | object = c->freelist; | |
2171 | do { | 2203 | if (object) |
2172 | object = c->page->freelist; | 2204 | goto load_freelist; |
2173 | counters = c->page->counters; | ||
2174 | new.counters = counters; | ||
2175 | VM_BUG_ON(!new.frozen); | ||
2176 | |||
2177 | /* | ||
2178 | * If there is no object left then we use this loop to | ||
2179 | * deactivate the slab which is simple since no objects | ||
2180 | * are left in the slab and therefore we do not need to | ||
2181 | * put the page back onto the partial list. | ||
2182 | * | ||
2183 | * If there are objects left then we retrieve them | ||
2184 | * and use them to refill the per cpu queue. | ||
2185 | */ | ||
2186 | 2205 | ||
2187 | new.inuse = c->page->objects; | 2206 | stat(s, ALLOC_SLOWPATH); |
2188 | new.frozen = object != NULL; | ||
2189 | 2207 | ||
2190 | } while (!__cmpxchg_double_slab(s, c->page, | 2208 | object = get_freelist(s, c->page); |
2191 | object, counters, | ||
2192 | NULL, new.counters, | ||
2193 | "__slab_alloc")); | ||
2194 | 2209 | ||
2195 | if (!object) { | 2210 | if (!object) { |
2196 | c->page = NULL; | 2211 | c->page = NULL; |
@@ -3028,7 +3043,9 @@ static int kmem_cache_open(struct kmem_cache *s, | |||
3028 | * per node list when we run out of per cpu objects. We only fetch 50% | 3043 | * per node list when we run out of per cpu objects. We only fetch 50% |
3029 | * to keep some capacity around for frees. | 3044 | * to keep some capacity around for frees. |
3030 | */ | 3045 | */ |
3031 | if (s->size >= PAGE_SIZE) | 3046 | if (kmem_cache_debug(s)) |
3047 | s->cpu_partial = 0; | ||
3048 | else if (s->size >= PAGE_SIZE) | ||
3032 | s->cpu_partial = 2; | 3049 | s->cpu_partial = 2; |
3033 | else if (s->size >= 1024) | 3050 | else if (s->size >= 1024) |
3034 | s->cpu_partial = 6; | 3051 | s->cpu_partial = 6; |
@@ -4637,6 +4654,8 @@ static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf, | |||
4637 | err = strict_strtoul(buf, 10, &objects); | 4654 | err = strict_strtoul(buf, 10, &objects); |
4638 | if (err) | 4655 | if (err) |
4639 | return err; | 4656 | return err; |
4657 | if (objects && kmem_cache_debug(s)) | ||
4658 | return -EINVAL; | ||
4640 | 4659 | ||
4641 | s->cpu_partial = objects; | 4660 | s->cpu_partial = objects; |
4642 | flush_all(s); | 4661 | flush_all(s); |