aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Lameter <clameter@sgi.com>2007-05-06 17:49:46 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-05-07 15:12:54 -0400
commit2086d26a05a4b5bda4a2f677bc143933bbdfa9f8 (patch)
tree7c07b8319f80119066d9dbd0c1a0910c94a1259c
parent88a420e4e21c1ff6592a668cf4e8af42eff30bad (diff)
SLUB: Free slabs and sort partial slab lists in kmem_cache_shrink
At kmem_cache_shrink check if we have any empty slabs on the partial if so then remove them. Also--as an anti-fragmentation measure--sort the partial slabs so that the most fully allocated ones come first and the least allocated last. The next allocations may fill up the nearly full slabs. Having the least allocated slabs last gives them the maximum chance that their remaining objects may be freed. Thus we can hopefully minimize the partial slabs. I think this is the best one can do in terms antifragmentation measures. Real defragmentation (meaning moving objects out of slabs with the least free objects to those that are almost full) can be implemted by reverse scanning through the list produced here but that would mean that we need to provide a callback at slab cache creation that allows the deletion or moving of an object. This will involve slab API changes, so defer for now. Cc: Mel Gorman <mel@skynet.ie> Signed-off-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/slub.c125
1 files changed, 112 insertions, 13 deletions
diff --git a/mm/slub.c b/mm/slub.c
index a6231963cae5..ed2846240f96 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -130,9 +130,19 @@
130 */ 130 */
131#define SLUB_UNIMPLEMENTED (SLAB_DEBUG_INITIAL) 131#define SLUB_UNIMPLEMENTED (SLAB_DEBUG_INITIAL)
132 132
133/* Mininum number of partial slabs */ 133/*
134 * Mininum number of partial slabs. These will be left on the partial
135 * lists even if they are empty. kmem_cache_shrink may reclaim them.
136 */
134#define MIN_PARTIAL 2 137#define MIN_PARTIAL 2
135 138
139/*
140 * Maximum number of desirable partial slabs.
141 * The existence of more partial slabs makes kmem_cache_shrink
142 * sort the partial list by the number of objects in the.
143 */
144#define MAX_PARTIAL 10
145
136#define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \ 146#define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \
137 SLAB_POISON | SLAB_STORE_USER) 147 SLAB_POISON | SLAB_STORE_USER)
138/* 148/*
@@ -1882,7 +1892,7 @@ static int kmem_cache_close(struct kmem_cache *s)
1882 for_each_online_node(node) { 1892 for_each_online_node(node) {
1883 struct kmem_cache_node *n = get_node(s, node); 1893 struct kmem_cache_node *n = get_node(s, node);
1884 1894
1885 free_list(s, n, &n->partial); 1895 n->nr_partial -= free_list(s, n, &n->partial);
1886 if (atomic_long_read(&n->nr_slabs)) 1896 if (atomic_long_read(&n->nr_slabs))
1887 return 1; 1897 return 1;
1888 } 1898 }
@@ -2130,6 +2140,86 @@ void kfree(const void *x)
2130} 2140}
2131EXPORT_SYMBOL(kfree); 2141EXPORT_SYMBOL(kfree);
2132 2142
2143/*
2144 * kmem_cache_shrink removes empty slabs from the partial lists
2145 * and then sorts the partially allocated slabs by the number
2146 * of items in use. The slabs with the most items in use
2147 * come first. New allocations will remove these from the
2148 * partial list because they are full. The slabs with the
2149 * least items are placed last. If it happens that the objects
2150 * are freed then the page can be returned to the page allocator.
2151 */
2152int kmem_cache_shrink(struct kmem_cache *s)
2153{
2154 int node;
2155 int i;
2156 struct kmem_cache_node *n;
2157 struct page *page;
2158 struct page *t;
2159 struct list_head *slabs_by_inuse =
2160 kmalloc(sizeof(struct list_head) * s->objects, GFP_KERNEL);
2161 unsigned long flags;
2162
2163 if (!slabs_by_inuse)
2164 return -ENOMEM;
2165
2166 flush_all(s);
2167 for_each_online_node(node) {
2168 n = get_node(s, node);
2169
2170 if (!n->nr_partial)
2171 continue;
2172
2173 for (i = 0; i < s->objects; i++)
2174 INIT_LIST_HEAD(slabs_by_inuse + i);
2175
2176 spin_lock_irqsave(&n->list_lock, flags);
2177
2178 /*
2179 * Build lists indexed by the items in use in
2180 * each slab or free slabs if empty.
2181 *
2182 * Note that concurrent frees may occur while
2183 * we hold the list_lock. page->inuse here is
2184 * the upper limit.
2185 */
2186 list_for_each_entry_safe(page, t, &n->partial, lru) {
2187 if (!page->inuse && slab_trylock(page)) {
2188 /*
2189 * Must hold slab lock here because slab_free
2190 * may have freed the last object and be
2191 * waiting to release the slab.
2192 */
2193 list_del(&page->lru);
2194 n->nr_partial--;
2195 slab_unlock(page);
2196 discard_slab(s, page);
2197 } else {
2198 if (n->nr_partial > MAX_PARTIAL)
2199 list_move(&page->lru,
2200 slabs_by_inuse + page->inuse);
2201 }
2202 }
2203
2204 if (n->nr_partial <= MAX_PARTIAL)
2205 goto out;
2206
2207 /*
2208 * Rebuild the partial list with the slabs filled up
2209 * most first and the least used slabs at the end.
2210 */
2211 for (i = s->objects - 1; i >= 0; i--)
2212 list_splice(slabs_by_inuse + i, n->partial.prev);
2213
2214 out:
2215 spin_unlock_irqrestore(&n->list_lock, flags);
2216 }
2217
2218 kfree(slabs_by_inuse);
2219 return 0;
2220}
2221EXPORT_SYMBOL(kmem_cache_shrink);
2222
2133/** 2223/**
2134 * krealloc - reallocate memory. The contents will remain unchanged. 2224 * krealloc - reallocate memory. The contents will remain unchanged.
2135 * 2225 *
@@ -2382,17 +2472,6 @@ static struct notifier_block __cpuinitdata slab_notifier =
2382 2472
2383#endif 2473#endif
2384 2474
2385/***************************************************************
2386 * Compatiblility definitions
2387 **************************************************************/
2388
2389int kmem_cache_shrink(struct kmem_cache *s)
2390{
2391 flush_all(s);
2392 return 0;
2393}
2394EXPORT_SYMBOL(kmem_cache_shrink);
2395
2396#ifdef CONFIG_NUMA 2475#ifdef CONFIG_NUMA
2397 2476
2398/***************************************************************** 2477/*****************************************************************
@@ -3169,6 +3248,25 @@ static ssize_t validate_store(struct kmem_cache *s,
3169} 3248}
3170SLAB_ATTR(validate); 3249SLAB_ATTR(validate);
3171 3250
3251static ssize_t shrink_show(struct kmem_cache *s, char *buf)
3252{
3253 return 0;
3254}
3255
3256static ssize_t shrink_store(struct kmem_cache *s,
3257 const char *buf, size_t length)
3258{
3259 if (buf[0] == '1') {
3260 int rc = kmem_cache_shrink(s);
3261
3262 if (rc)
3263 return rc;
3264 } else
3265 return -EINVAL;
3266 return length;
3267}
3268SLAB_ATTR(shrink);
3269
3172static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf) 3270static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
3173{ 3271{
3174 if (!(s->flags & SLAB_STORE_USER)) 3272 if (!(s->flags & SLAB_STORE_USER))
@@ -3225,6 +3323,7 @@ static struct attribute * slab_attrs[] = {
3225 &poison_attr.attr, 3323 &poison_attr.attr,
3226 &store_user_attr.attr, 3324 &store_user_attr.attr,
3227 &validate_attr.attr, 3325 &validate_attr.attr,
3326 &shrink_attr.attr,
3228 &alloc_calls_attr.attr, 3327 &alloc_calls_attr.attr,
3229 &free_calls_attr.attr, 3328 &free_calls_attr.attr,
3230#ifdef CONFIG_ZONE_DMA 3329#ifdef CONFIG_ZONE_DMA