aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVladimir Davydov <vdavydov@parallels.com>2015-02-12 17:59:41 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-12 21:54:10 -0500
commit832f37f5d5f5c7281880c21eb09508750b67f540 (patch)
tree9d0102615b2169659d156f633ba7a304ad27e590
parent2788cf0c401c268b4819c5407493a8769b7007aa (diff)
slub: never fail to shrink cache
SLUB's version of __kmem_cache_shrink() not only removes empty slabs, but also tries to rearrange the partial lists to place slabs filled up most to the head to cope with fragmentation. To achieve that, it allocates a temporary array of lists used to sort slabs by the number of objects in use. If the allocation fails, the whole procedure is aborted. This is unacceptable for the kernel memory accounting extension of the memory cgroup, where we want to make sure that kmem_cache_shrink() successfully discarded empty slabs. Although the allocation failure is utterly unlikely with the current page allocator implementation, which retries GFP_KERNEL allocations of order <= 2 infinitely, it is better not to rely on that. This patch therefore makes __kmem_cache_shrink() allocate the array on stack instead of calling kmalloc, which may fail. The array size is chosen to be equal to 32, because most SLUB caches store not more than 32 objects per slab page. Slab pages with <= 32 free objects are sorted using the array by the number of objects in use and promoted to the head of the partial list, while slab pages with > 32 free objects are left in the end of the list without any ordering imposed on them. Signed-off-by: Vladimir Davydov <vdavydov@parallels.com> Acked-by: Christoph Lameter <cl@linux.com> Acked-by: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Huang Ying <ying.huang@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/slub.c58
1 files changed, 31 insertions, 27 deletions
diff --git a/mm/slub.c b/mm/slub.c
index 1e5a4636cb23..d97b692165d2 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3358,11 +3358,12 @@ void kfree(const void *x)
3358} 3358}
3359EXPORT_SYMBOL(kfree); 3359EXPORT_SYMBOL(kfree);
3360 3360
3361#define SHRINK_PROMOTE_MAX 32
3362
3361/* 3363/*
3362 * kmem_cache_shrink removes empty slabs from the partial lists and sorts 3364 * kmem_cache_shrink discards empty slabs and promotes the slabs filled
3363 * the remaining slabs by the number of items in use. The slabs with the 3365 * up most to the head of the partial lists. New allocations will then
3364 * most items in use come first. New allocations will then fill those up 3366 * fill those up and thus they can be removed from the partial lists.
3365 * and thus they can be removed from the partial lists.
3366 * 3367 *
3367 * The slabs with the least items are placed last. This results in them 3368 * The slabs with the least items are placed last. This results in them
3368 * being allocated from last increasing the chance that the last objects 3369 * being allocated from last increasing the chance that the last objects
@@ -3375,51 +3376,57 @@ int __kmem_cache_shrink(struct kmem_cache *s)
3375 struct kmem_cache_node *n; 3376 struct kmem_cache_node *n;
3376 struct page *page; 3377 struct page *page;
3377 struct page *t; 3378 struct page *t;
3378 int objects = oo_objects(s->max); 3379 struct list_head discard;
3379 struct list_head *slabs_by_inuse = 3380 struct list_head promote[SHRINK_PROMOTE_MAX];
3380 kmalloc(sizeof(struct list_head) * objects, GFP_KERNEL);
3381 unsigned long flags; 3381 unsigned long flags;
3382 3382
3383 if (!slabs_by_inuse)
3384 return -ENOMEM;
3385
3386 flush_all(s); 3383 flush_all(s);
3387 for_each_kmem_cache_node(s, node, n) { 3384 for_each_kmem_cache_node(s, node, n) {
3388 if (!n->nr_partial) 3385 if (!n->nr_partial)
3389 continue; 3386 continue;
3390 3387
3391 for (i = 0; i < objects; i++) 3388 INIT_LIST_HEAD(&discard);
3392 INIT_LIST_HEAD(slabs_by_inuse + i); 3389 for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
3390 INIT_LIST_HEAD(promote + i);
3393 3391
3394 spin_lock_irqsave(&n->list_lock, flags); 3392 spin_lock_irqsave(&n->list_lock, flags);
3395 3393
3396 /* 3394 /*
3397 * Build lists indexed by the items in use in each slab. 3395 * Build lists of slabs to discard or promote.
3398 * 3396 *
3399 * Note that concurrent frees may occur while we hold the 3397 * Note that concurrent frees may occur while we hold the
3400 * list_lock. page->inuse here is the upper limit. 3398 * list_lock. page->inuse here is the upper limit.
3401 */ 3399 */
3402 list_for_each_entry_safe(page, t, &n->partial, lru) { 3400 list_for_each_entry_safe(page, t, &n->partial, lru) {
3403 list_move(&page->lru, slabs_by_inuse + page->inuse); 3401 int free = page->objects - page->inuse;
3404 if (!page->inuse) 3402
3403 /* Do not reread page->inuse */
3404 barrier();
3405
3406 /* We do not keep full slabs on the list */
3407 BUG_ON(free <= 0);
3408
3409 if (free == page->objects) {
3410 list_move(&page->lru, &discard);
3405 n->nr_partial--; 3411 n->nr_partial--;
3412 } else if (free <= SHRINK_PROMOTE_MAX)
3413 list_move(&page->lru, promote + free - 1);
3406 } 3414 }
3407 3415
3408 /* 3416 /*
3409 * Rebuild the partial list with the slabs filled up most 3417 * Promote the slabs filled up most to the head of the
3410 * first and the least used slabs at the end. 3418 * partial list.
3411 */ 3419 */
3412 for (i = objects - 1; i > 0; i--) 3420 for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--)
3413 list_splice(slabs_by_inuse + i, n->partial.prev); 3421 list_splice(promote + i, &n->partial);
3414 3422
3415 spin_unlock_irqrestore(&n->list_lock, flags); 3423 spin_unlock_irqrestore(&n->list_lock, flags);
3416 3424
3417 /* Release empty slabs */ 3425 /* Release empty slabs */
3418 list_for_each_entry_safe(page, t, slabs_by_inuse, lru) 3426 list_for_each_entry_safe(page, t, &discard, lru)
3419 discard_slab(s, page); 3427 discard_slab(s, page);
3420 } 3428 }
3421 3429
3422 kfree(slabs_by_inuse);
3423 return 0; 3430 return 0;
3424} 3431}
3425 3432
@@ -4686,12 +4693,9 @@ static ssize_t shrink_show(struct kmem_cache *s, char *buf)
4686static ssize_t shrink_store(struct kmem_cache *s, 4693static ssize_t shrink_store(struct kmem_cache *s,
4687 const char *buf, size_t length) 4694 const char *buf, size_t length)
4688{ 4695{
4689 if (buf[0] == '1') { 4696 if (buf[0] == '1')
4690 int rc = kmem_cache_shrink(s); 4697 kmem_cache_shrink(s);
4691 4698 else
4692 if (rc)
4693 return rc;
4694 } else
4695 return -EINVAL; 4699 return -EINVAL;
4696 return length; 4700 return length;
4697} 4701}