aboutsummaryrefslogtreecommitdiffstats
path: root/mm/slab.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/slab.c')
-rw-r--r--mm/slab.c620
1 files changed, 354 insertions, 266 deletions
diff --git a/mm/slab.c b/mm/slab.c
index 98ac20bc0de9..3dbd6f4e7477 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -89,6 +89,7 @@
89#include <linux/config.h> 89#include <linux/config.h>
90#include <linux/slab.h> 90#include <linux/slab.h>
91#include <linux/mm.h> 91#include <linux/mm.h>
92#include <linux/poison.h>
92#include <linux/swap.h> 93#include <linux/swap.h>
93#include <linux/cache.h> 94#include <linux/cache.h>
94#include <linux/interrupt.h> 95#include <linux/interrupt.h>
@@ -106,6 +107,7 @@
106#include <linux/nodemask.h> 107#include <linux/nodemask.h>
107#include <linux/mempolicy.h> 108#include <linux/mempolicy.h>
108#include <linux/mutex.h> 109#include <linux/mutex.h>
110#include <linux/rtmutex.h>
109 111
110#include <asm/uaccess.h> 112#include <asm/uaccess.h>
111#include <asm/cacheflush.h> 113#include <asm/cacheflush.h>
@@ -307,6 +309,13 @@ struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];
307#define SIZE_AC 1 309#define SIZE_AC 1
308#define SIZE_L3 (1 + MAX_NUMNODES) 310#define SIZE_L3 (1 + MAX_NUMNODES)
309 311
312static int drain_freelist(struct kmem_cache *cache,
313 struct kmem_list3 *l3, int tofree);
314static void free_block(struct kmem_cache *cachep, void **objpp, int len,
315 int node);
316static int enable_cpucache(struct kmem_cache *cachep);
317static void cache_reap(void *unused);
318
310/* 319/*
311 * This function must be completely optimized away if a constant is passed to 320 * This function must be completely optimized away if a constant is passed to
312 * it. Mostly the same as what is in linux/slab.h except it returns an index. 321 * it. Mostly the same as what is in linux/slab.h except it returns an index.
@@ -454,7 +463,7 @@ struct kmem_cache {
454#define STATS_DEC_ACTIVE(x) ((x)->num_active--) 463#define STATS_DEC_ACTIVE(x) ((x)->num_active--)
455#define STATS_INC_ALLOCED(x) ((x)->num_allocations++) 464#define STATS_INC_ALLOCED(x) ((x)->num_allocations++)
456#define STATS_INC_GROWN(x) ((x)->grown++) 465#define STATS_INC_GROWN(x) ((x)->grown++)
457#define STATS_INC_REAPED(x) ((x)->reaped++) 466#define STATS_ADD_REAPED(x,y) ((x)->reaped += (y))
458#define STATS_SET_HIGH(x) \ 467#define STATS_SET_HIGH(x) \
459 do { \ 468 do { \
460 if ((x)->num_active > (x)->high_mark) \ 469 if ((x)->num_active > (x)->high_mark) \
@@ -478,7 +487,7 @@ struct kmem_cache {
478#define STATS_DEC_ACTIVE(x) do { } while (0) 487#define STATS_DEC_ACTIVE(x) do { } while (0)
479#define STATS_INC_ALLOCED(x) do { } while (0) 488#define STATS_INC_ALLOCED(x) do { } while (0)
480#define STATS_INC_GROWN(x) do { } while (0) 489#define STATS_INC_GROWN(x) do { } while (0)
481#define STATS_INC_REAPED(x) do { } while (0) 490#define STATS_ADD_REAPED(x,y) do { } while (0)
482#define STATS_SET_HIGH(x) do { } while (0) 491#define STATS_SET_HIGH(x) do { } while (0)
483#define STATS_INC_ERR(x) do { } while (0) 492#define STATS_INC_ERR(x) do { } while (0)
484#define STATS_INC_NODEALLOCS(x) do { } while (0) 493#define STATS_INC_NODEALLOCS(x) do { } while (0)
@@ -492,17 +501,6 @@ struct kmem_cache {
492#endif 501#endif
493 502
494#if DEBUG 503#if DEBUG
495/*
496 * Magic nums for obj red zoning.
497 * Placed in the first word before and the first word after an obj.
498 */
499#define RED_INACTIVE 0x5A2CF071UL /* when obj is inactive */
500#define RED_ACTIVE 0x170FC2A5UL /* when obj is active */
501
502/* ...and for poisoning */
503#define POISON_INUSE 0x5a /* for use-uninitialised poisoning */
504#define POISON_FREE 0x6b /* for use-after-free poisoning */
505#define POISON_END 0xa5 /* end-byte of poisoning */
506 504
507/* 505/*
508 * memory layout of objects: 506 * memory layout of objects:
@@ -676,17 +674,66 @@ static struct kmem_cache cache_cache = {
676#endif 674#endif
677}; 675};
678 676
679/* Guard access to the cache-chain. */ 677#define BAD_ALIEN_MAGIC 0x01020304ul
680static DEFINE_MUTEX(cache_chain_mutex); 678
681static struct list_head cache_chain; 679#ifdef CONFIG_LOCKDEP
682 680
683/* 681/*
684 * vm_enough_memory() looks at this to determine how many slab-allocated pages 682 * Slab sometimes uses the kmalloc slabs to store the slab headers
685 * are possibly freeable under pressure 683 * for other slabs "off slab".
684 * The locking for this is tricky in that it nests within the locks
685 * of all other slabs in a few places; to deal with this special
686 * locking we put on-slab caches into a separate lock-class.
686 * 687 *
687 * SLAB_RECLAIM_ACCOUNT turns this on per-slab 688 * We set lock class for alien array caches which are up during init.
689 * The lock annotation will be lost if all cpus of a node goes down and
690 * then comes back up during hotplug
688 */ 691 */
689atomic_t slab_reclaim_pages; 692static struct lock_class_key on_slab_l3_key;
693static struct lock_class_key on_slab_alc_key;
694
695static inline void init_lock_keys(void)
696
697{
698 int q;
699 struct cache_sizes *s = malloc_sizes;
700
701 while (s->cs_size != ULONG_MAX) {
702 for_each_node(q) {
703 struct array_cache **alc;
704 int r;
705 struct kmem_list3 *l3 = s->cs_cachep->nodelists[q];
706 if (!l3 || OFF_SLAB(s->cs_cachep))
707 continue;
708 lockdep_set_class(&l3->list_lock, &on_slab_l3_key);
709 alc = l3->alien;
710 /*
711 * FIXME: This check for BAD_ALIEN_MAGIC
712 * should go away when common slab code is taught to
713 * work even without alien caches.
714 * Currently, non NUMA code returns BAD_ALIEN_MAGIC
715 * for alloc_alien_cache,
716 */
717 if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
718 continue;
719 for_each_node(r) {
720 if (alc[r])
721 lockdep_set_class(&alc[r]->lock,
722 &on_slab_alc_key);
723 }
724 }
725 s++;
726 }
727}
728#else
729static inline void init_lock_keys(void)
730{
731}
732#endif
733
734/* Guard access to the cache-chain. */
735static DEFINE_MUTEX(cache_chain_mutex);
736static struct list_head cache_chain;
690 737
691/* 738/*
692 * chicken and egg problem: delay the per-cpu array allocation 739 * chicken and egg problem: delay the per-cpu array allocation
@@ -709,12 +756,6 @@ int slab_is_available(void)
709 756
710static DEFINE_PER_CPU(struct work_struct, reap_work); 757static DEFINE_PER_CPU(struct work_struct, reap_work);
711 758
712static void free_block(struct kmem_cache *cachep, void **objpp, int len,
713 int node);
714static void enable_cpucache(struct kmem_cache *cachep);
715static void cache_reap(void *unused);
716static int __node_shrink(struct kmem_cache *cachep, int node);
717
718static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) 759static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
719{ 760{
720 return cachep->array[smp_processor_id()]; 761 return cachep->array[smp_processor_id()];
@@ -745,11 +786,10 @@ static inline struct kmem_cache *__find_general_cachep(size_t size,
745 return csizep->cs_cachep; 786 return csizep->cs_cachep;
746} 787}
747 788
748struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags) 789static struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags)
749{ 790{
750 return __find_general_cachep(size, gfpflags); 791 return __find_general_cachep(size, gfpflags);
751} 792}
752EXPORT_SYMBOL(kmem_find_general_cachep);
753 793
754static size_t slab_mgmt_size(size_t nr_objs, size_t align) 794static size_t slab_mgmt_size(size_t nr_objs, size_t align)
755{ 795{
@@ -932,7 +972,39 @@ static int transfer_objects(struct array_cache *to,
932 return nr; 972 return nr;
933} 973}
934 974
935#ifdef CONFIG_NUMA 975#ifndef CONFIG_NUMA
976
977#define drain_alien_cache(cachep, alien) do { } while (0)
978#define reap_alien(cachep, l3) do { } while (0)
979
980static inline struct array_cache **alloc_alien_cache(int node, int limit)
981{
982 return (struct array_cache **)BAD_ALIEN_MAGIC;
983}
984
985static inline void free_alien_cache(struct array_cache **ac_ptr)
986{
987}
988
989static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
990{
991 return 0;
992}
993
994static inline void *alternate_node_alloc(struct kmem_cache *cachep,
995 gfp_t flags)
996{
997 return NULL;
998}
999
1000static inline void *__cache_alloc_node(struct kmem_cache *cachep,
1001 gfp_t flags, int nodeid)
1002{
1003 return NULL;
1004}
1005
1006#else /* CONFIG_NUMA */
1007
936static void *__cache_alloc_node(struct kmem_cache *, gfp_t, int); 1008static void *__cache_alloc_node(struct kmem_cache *, gfp_t, int);
937static void *alternate_node_alloc(struct kmem_cache *, gfp_t); 1009static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
938 1010
@@ -1061,29 +1133,9 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1061 } 1133 }
1062 return 1; 1134 return 1;
1063} 1135}
1064
1065#else
1066
1067#define drain_alien_cache(cachep, alien) do { } while (0)
1068#define reap_alien(cachep, l3) do { } while (0)
1069
1070static inline struct array_cache **alloc_alien_cache(int node, int limit)
1071{
1072 return (struct array_cache **) 0x01020304ul;
1073}
1074
1075static inline void free_alien_cache(struct array_cache **ac_ptr)
1076{
1077}
1078
1079static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1080{
1081 return 0;
1082}
1083
1084#endif 1136#endif
1085 1137
1086static int cpuup_callback(struct notifier_block *nfb, 1138static int __cpuinit cpuup_callback(struct notifier_block *nfb,
1087 unsigned long action, void *hcpu) 1139 unsigned long action, void *hcpu)
1088{ 1140{
1089 long cpu = (long)hcpu; 1141 long cpu = (long)hcpu;
@@ -1250,10 +1302,7 @@ free_array_cache:
1250 l3 = cachep->nodelists[node]; 1302 l3 = cachep->nodelists[node];
1251 if (!l3) 1303 if (!l3)
1252 continue; 1304 continue;
1253 spin_lock_irq(&l3->list_lock); 1305 drain_freelist(cachep, l3, l3->free_objects);
1254 /* free slabs belonging to this node */
1255 __node_shrink(cachep, node);
1256 spin_unlock_irq(&l3->list_lock);
1257 } 1306 }
1258 mutex_unlock(&cache_chain_mutex); 1307 mutex_unlock(&cache_chain_mutex);
1259 break; 1308 break;
@@ -1265,7 +1314,9 @@ bad:
1265 return NOTIFY_BAD; 1314 return NOTIFY_BAD;
1266} 1315}
1267 1316
1268static struct notifier_block cpucache_notifier = { &cpuup_callback, NULL, 0 }; 1317static struct notifier_block __cpuinitdata cpucache_notifier = {
1318 &cpuup_callback, NULL, 0
1319};
1269 1320
1270/* 1321/*
1271 * swap the static kmem_list3 with kmalloced memory 1322 * swap the static kmem_list3 with kmalloced memory
@@ -1281,6 +1332,11 @@ static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list,
1281 1332
1282 local_irq_disable(); 1333 local_irq_disable();
1283 memcpy(ptr, list, sizeof(struct kmem_list3)); 1334 memcpy(ptr, list, sizeof(struct kmem_list3));
1335 /*
1336 * Do not assume that spinlocks can be initialized via memcpy:
1337 */
1338 spin_lock_init(&ptr->list_lock);
1339
1284 MAKE_ALL_LISTS(cachep, ptr, nodeid); 1340 MAKE_ALL_LISTS(cachep, ptr, nodeid);
1285 cachep->nodelists[nodeid] = ptr; 1341 cachep->nodelists[nodeid] = ptr;
1286 local_irq_enable(); 1342 local_irq_enable();
@@ -1407,7 +1463,7 @@ void __init kmem_cache_init(void)
1407 } 1463 }
1408 /* 4) Replace the bootstrap head arrays */ 1464 /* 4) Replace the bootstrap head arrays */
1409 { 1465 {
1410 void *ptr; 1466 struct array_cache *ptr;
1411 1467
1412 ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); 1468 ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);
1413 1469
@@ -1415,6 +1471,11 @@ void __init kmem_cache_init(void)
1415 BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache); 1471 BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache);
1416 memcpy(ptr, cpu_cache_get(&cache_cache), 1472 memcpy(ptr, cpu_cache_get(&cache_cache),
1417 sizeof(struct arraycache_init)); 1473 sizeof(struct arraycache_init));
1474 /*
1475 * Do not assume that spinlocks can be initialized via memcpy:
1476 */
1477 spin_lock_init(&ptr->lock);
1478
1418 cache_cache.array[smp_processor_id()] = ptr; 1479 cache_cache.array[smp_processor_id()] = ptr;
1419 local_irq_enable(); 1480 local_irq_enable();
1420 1481
@@ -1425,6 +1486,11 @@ void __init kmem_cache_init(void)
1425 != &initarray_generic.cache); 1486 != &initarray_generic.cache);
1426 memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep), 1487 memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep),
1427 sizeof(struct arraycache_init)); 1488 sizeof(struct arraycache_init));
1489 /*
1490 * Do not assume that spinlocks can be initialized via memcpy:
1491 */
1492 spin_lock_init(&ptr->lock);
1493
1428 malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] = 1494 malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] =
1429 ptr; 1495 ptr;
1430 local_irq_enable(); 1496 local_irq_enable();
@@ -1453,10 +1519,15 @@ void __init kmem_cache_init(void)
1453 struct kmem_cache *cachep; 1519 struct kmem_cache *cachep;
1454 mutex_lock(&cache_chain_mutex); 1520 mutex_lock(&cache_chain_mutex);
1455 list_for_each_entry(cachep, &cache_chain, next) 1521 list_for_each_entry(cachep, &cache_chain, next)
1456 enable_cpucache(cachep); 1522 if (enable_cpucache(cachep))
1523 BUG();
1457 mutex_unlock(&cache_chain_mutex); 1524 mutex_unlock(&cache_chain_mutex);
1458 } 1525 }
1459 1526
1527 /* Annotate slab for lockdep -- annotate the malloc caches */
1528 init_lock_keys();
1529
1530
1460 /* Done! */ 1531 /* Done! */
1461 g_cpucache_up = FULL; 1532 g_cpucache_up = FULL;
1462 1533
@@ -1505,7 +1576,13 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
1505 */ 1576 */
1506 flags |= __GFP_COMP; 1577 flags |= __GFP_COMP;
1507#endif 1578#endif
1508 flags |= cachep->gfpflags; 1579
1580 /*
1581 * Under NUMA we want memory on the indicated node. We will handle
1582 * the needed fallback ourselves since we want to serve from our
1583 * per node object lists first for other nodes.
1584 */
1585 flags |= cachep->gfpflags | GFP_THISNODE;
1509 1586
1510 page = alloc_pages_node(nodeid, flags, cachep->gfporder); 1587 page = alloc_pages_node(nodeid, flags, cachep->gfporder);
1511 if (!page) 1588 if (!page)
@@ -1513,8 +1590,11 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
1513 1590
1514 nr_pages = (1 << cachep->gfporder); 1591 nr_pages = (1 << cachep->gfporder);
1515 if (cachep->flags & SLAB_RECLAIM_ACCOUNT) 1592 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1516 atomic_add(nr_pages, &slab_reclaim_pages); 1593 add_zone_page_state(page_zone(page),
1517 add_page_state(nr_slab, nr_pages); 1594 NR_SLAB_RECLAIMABLE, nr_pages);
1595 else
1596 add_zone_page_state(page_zone(page),
1597 NR_SLAB_UNRECLAIMABLE, nr_pages);
1518 for (i = 0; i < nr_pages; i++) 1598 for (i = 0; i < nr_pages; i++)
1519 __SetPageSlab(page + i); 1599 __SetPageSlab(page + i);
1520 return page_address(page); 1600 return page_address(page);
@@ -1529,17 +1609,20 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
1529 struct page *page = virt_to_page(addr); 1609 struct page *page = virt_to_page(addr);
1530 const unsigned long nr_freed = i; 1610 const unsigned long nr_freed = i;
1531 1611
1612 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1613 sub_zone_page_state(page_zone(page),
1614 NR_SLAB_RECLAIMABLE, nr_freed);
1615 else
1616 sub_zone_page_state(page_zone(page),
1617 NR_SLAB_UNRECLAIMABLE, nr_freed);
1532 while (i--) { 1618 while (i--) {
1533 BUG_ON(!PageSlab(page)); 1619 BUG_ON(!PageSlab(page));
1534 __ClearPageSlab(page); 1620 __ClearPageSlab(page);
1535 page++; 1621 page++;
1536 } 1622 }
1537 sub_page_state(nr_slab, nr_freed);
1538 if (current->reclaim_state) 1623 if (current->reclaim_state)
1539 current->reclaim_state->reclaimed_slab += nr_freed; 1624 current->reclaim_state->reclaimed_slab += nr_freed;
1540 free_pages((unsigned long)addr, cachep->gfporder); 1625 free_pages((unsigned long)addr, cachep->gfporder);
1541 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1542 atomic_sub(1 << cachep->gfporder, &slab_reclaim_pages);
1543} 1626}
1544 1627
1545static void kmem_rcu_free(struct rcu_head *head) 1628static void kmem_rcu_free(struct rcu_head *head)
@@ -1600,10 +1683,32 @@ static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val)
1600static void dump_line(char *data, int offset, int limit) 1683static void dump_line(char *data, int offset, int limit)
1601{ 1684{
1602 int i; 1685 int i;
1686 unsigned char error = 0;
1687 int bad_count = 0;
1688
1603 printk(KERN_ERR "%03x:", offset); 1689 printk(KERN_ERR "%03x:", offset);
1604 for (i = 0; i < limit; i++) 1690 for (i = 0; i < limit; i++) {
1691 if (data[offset + i] != POISON_FREE) {
1692 error = data[offset + i];
1693 bad_count++;
1694 }
1605 printk(" %02x", (unsigned char)data[offset + i]); 1695 printk(" %02x", (unsigned char)data[offset + i]);
1696 }
1606 printk("\n"); 1697 printk("\n");
1698
1699 if (bad_count == 1) {
1700 error ^= POISON_FREE;
1701 if (!(error & (error - 1))) {
1702 printk(KERN_ERR "Single bit error detected. Probably "
1703 "bad RAM.\n");
1704#ifdef CONFIG_X86
1705 printk(KERN_ERR "Run memtest86+ or a similar memory "
1706 "test tool.\n");
1707#else
1708 printk(KERN_ERR "Run a memory test tool.\n");
1709#endif
1710 }
1711 }
1607} 1712}
1608#endif 1713#endif
1609 1714
@@ -1796,6 +1901,27 @@ static void set_up_list3s(struct kmem_cache *cachep, int index)
1796 } 1901 }
1797} 1902}
1798 1903
1904static void __kmem_cache_destroy(struct kmem_cache *cachep)
1905{
1906 int i;
1907 struct kmem_list3 *l3;
1908
1909 for_each_online_cpu(i)
1910 kfree(cachep->array[i]);
1911
1912 /* NUMA: free the list3 structures */
1913 for_each_online_node(i) {
1914 l3 = cachep->nodelists[i];
1915 if (l3) {
1916 kfree(l3->shared);
1917 free_alien_cache(l3->alien);
1918 kfree(l3);
1919 }
1920 }
1921 kmem_cache_free(&cache_cache, cachep);
1922}
1923
1924
1799/** 1925/**
1800 * calculate_slab_order - calculate size (page order) of slabs 1926 * calculate_slab_order - calculate size (page order) of slabs
1801 * @cachep: pointer to the cache that is being created 1927 * @cachep: pointer to the cache that is being created
@@ -1866,12 +1992,11 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
1866 return left_over; 1992 return left_over;
1867} 1993}
1868 1994
1869static void setup_cpu_cache(struct kmem_cache *cachep) 1995static int setup_cpu_cache(struct kmem_cache *cachep)
1870{ 1996{
1871 if (g_cpucache_up == FULL) { 1997 if (g_cpucache_up == FULL)
1872 enable_cpucache(cachep); 1998 return enable_cpucache(cachep);
1873 return; 1999
1874 }
1875 if (g_cpucache_up == NONE) { 2000 if (g_cpucache_up == NONE) {
1876 /* 2001 /*
1877 * Note: the first kmem_cache_create must create the cache 2002 * Note: the first kmem_cache_create must create the cache
@@ -1918,6 +2043,7 @@ static void setup_cpu_cache(struct kmem_cache *cachep)
1918 cpu_cache_get(cachep)->touched = 0; 2043 cpu_cache_get(cachep)->touched = 0;
1919 cachep->batchcount = 1; 2044 cachep->batchcount = 1;
1920 cachep->limit = BOOT_CPUCACHE_ENTRIES; 2045 cachep->limit = BOOT_CPUCACHE_ENTRIES;
2046 return 0;
1921} 2047}
1922 2048
1923/** 2049/**
@@ -2059,6 +2185,15 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2059 } else { 2185 } else {
2060 ralign = BYTES_PER_WORD; 2186 ralign = BYTES_PER_WORD;
2061 } 2187 }
2188
2189 /*
2190 * Redzoning and user store require word alignment. Note this will be
2191 * overridden by architecture or caller mandated alignment if either
2192 * is greater than BYTES_PER_WORD.
2193 */
2194 if (flags & SLAB_RED_ZONE || flags & SLAB_STORE_USER)
2195 ralign = BYTES_PER_WORD;
2196
2062 /* 2) arch mandated alignment: disables debug if necessary */ 2197 /* 2) arch mandated alignment: disables debug if necessary */
2063 if (ralign < ARCH_SLAB_MINALIGN) { 2198 if (ralign < ARCH_SLAB_MINALIGN) {
2064 ralign = ARCH_SLAB_MINALIGN; 2199 ralign = ARCH_SLAB_MINALIGN;
@@ -2072,8 +2207,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2072 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); 2207 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
2073 } 2208 }
2074 /* 2209 /*
2075 * 4) Store it. Note that the debug code below can reduce 2210 * 4) Store it.
2076 * the alignment to BYTES_PER_WORD.
2077 */ 2211 */
2078 align = ralign; 2212 align = ralign;
2079 2213
@@ -2085,20 +2219,19 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2085#if DEBUG 2219#if DEBUG
2086 cachep->obj_size = size; 2220 cachep->obj_size = size;
2087 2221
2222 /*
2223 * Both debugging options require word-alignment which is calculated
2224 * into align above.
2225 */
2088 if (flags & SLAB_RED_ZONE) { 2226 if (flags & SLAB_RED_ZONE) {
2089 /* redzoning only works with word aligned caches */
2090 align = BYTES_PER_WORD;
2091
2092 /* add space for red zone words */ 2227 /* add space for red zone words */
2093 cachep->obj_offset += BYTES_PER_WORD; 2228 cachep->obj_offset += BYTES_PER_WORD;
2094 size += 2 * BYTES_PER_WORD; 2229 size += 2 * BYTES_PER_WORD;
2095 } 2230 }
2096 if (flags & SLAB_STORE_USER) { 2231 if (flags & SLAB_STORE_USER) {
2097 /* user store requires word alignment and 2232 /* user store requires one word storage behind the end of
2098 * one word storage behind the end of the real 2233 * the real object.
2099 * object.
2100 */ 2234 */
2101 align = BYTES_PER_WORD;
2102 size += BYTES_PER_WORD; 2235 size += BYTES_PER_WORD;
2103 } 2236 }
2104#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC) 2237#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
@@ -2162,14 +2295,26 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2162 cachep->gfpflags |= GFP_DMA; 2295 cachep->gfpflags |= GFP_DMA;
2163 cachep->buffer_size = size; 2296 cachep->buffer_size = size;
2164 2297
2165 if (flags & CFLGS_OFF_SLAB) 2298 if (flags & CFLGS_OFF_SLAB) {
2166 cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u); 2299 cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u);
2300 /*
2301 * This is a possibility for one of the malloc_sizes caches.
2302 * But since we go off slab only for object size greater than
2303 * PAGE_SIZE/8, and malloc_sizes gets created in ascending order,
2304 * this should not happen at all.
2305 * But leave a BUG_ON for some lucky dude.
2306 */
2307 BUG_ON(!cachep->slabp_cache);
2308 }
2167 cachep->ctor = ctor; 2309 cachep->ctor = ctor;
2168 cachep->dtor = dtor; 2310 cachep->dtor = dtor;
2169 cachep->name = name; 2311 cachep->name = name;
2170 2312
2171 2313 if (setup_cpu_cache(cachep)) {
2172 setup_cpu_cache(cachep); 2314 __kmem_cache_destroy(cachep);
2315 cachep = NULL;
2316 goto oops;
2317 }
2173 2318
2174 /* cache setup completed, link it into the list */ 2319 /* cache setup completed, link it into the list */
2175 list_add(&cachep->next, &cache_chain); 2320 list_add(&cachep->next, &cache_chain);
@@ -2255,32 +2400,45 @@ static void drain_cpu_caches(struct kmem_cache *cachep)
2255 } 2400 }
2256} 2401}
2257 2402
2258static int __node_shrink(struct kmem_cache *cachep, int node) 2403/*
2404 * Remove slabs from the list of free slabs.
2405 * Specify the number of slabs to drain in tofree.
2406 *
2407 * Returns the actual number of slabs released.
2408 */
2409static int drain_freelist(struct kmem_cache *cache,
2410 struct kmem_list3 *l3, int tofree)
2259{ 2411{
2412 struct list_head *p;
2413 int nr_freed;
2260 struct slab *slabp; 2414 struct slab *slabp;
2261 struct kmem_list3 *l3 = cachep->nodelists[node];
2262 int ret;
2263 2415
2264 for (;;) { 2416 nr_freed = 0;
2265 struct list_head *p; 2417 while (nr_freed < tofree && !list_empty(&l3->slabs_free)) {
2266 2418
2419 spin_lock_irq(&l3->list_lock);
2267 p = l3->slabs_free.prev; 2420 p = l3->slabs_free.prev;
2268 if (p == &l3->slabs_free) 2421 if (p == &l3->slabs_free) {
2269 break; 2422 spin_unlock_irq(&l3->list_lock);
2423 goto out;
2424 }
2270 2425
2271 slabp = list_entry(l3->slabs_free.prev, struct slab, list); 2426 slabp = list_entry(p, struct slab, list);
2272#if DEBUG 2427#if DEBUG
2273 BUG_ON(slabp->inuse); 2428 BUG_ON(slabp->inuse);
2274#endif 2429#endif
2275 list_del(&slabp->list); 2430 list_del(&slabp->list);
2276 2431 /*
2277 l3->free_objects -= cachep->num; 2432 * Safe to drop the lock. The slab is no longer linked
2433 * to the cache.
2434 */
2435 l3->free_objects -= cache->num;
2278 spin_unlock_irq(&l3->list_lock); 2436 spin_unlock_irq(&l3->list_lock);
2279 slab_destroy(cachep, slabp); 2437 slab_destroy(cache, slabp);
2280 spin_lock_irq(&l3->list_lock); 2438 nr_freed++;
2281 } 2439 }
2282 ret = !list_empty(&l3->slabs_full) || !list_empty(&l3->slabs_partial); 2440out:
2283 return ret; 2441 return nr_freed;
2284} 2442}
2285 2443
2286static int __cache_shrink(struct kmem_cache *cachep) 2444static int __cache_shrink(struct kmem_cache *cachep)
@@ -2293,11 +2451,13 @@ static int __cache_shrink(struct kmem_cache *cachep)
2293 check_irq_on(); 2451 check_irq_on();
2294 for_each_online_node(i) { 2452 for_each_online_node(i) {
2295 l3 = cachep->nodelists[i]; 2453 l3 = cachep->nodelists[i];
2296 if (l3) { 2454 if (!l3)
2297 spin_lock_irq(&l3->list_lock); 2455 continue;
2298 ret += __node_shrink(cachep, i); 2456
2299 spin_unlock_irq(&l3->list_lock); 2457 drain_freelist(cachep, l3, l3->free_objects);
2300 } 2458
2459 ret += !list_empty(&l3->slabs_full) ||
2460 !list_empty(&l3->slabs_partial);
2301 } 2461 }
2302 return (ret ? 1 : 0); 2462 return (ret ? 1 : 0);
2303} 2463}
@@ -2322,7 +2482,6 @@ EXPORT_SYMBOL(kmem_cache_shrink);
2322 * @cachep: the cache to destroy 2482 * @cachep: the cache to destroy
2323 * 2483 *
2324 * Remove a struct kmem_cache object from the slab cache. 2484 * Remove a struct kmem_cache object from the slab cache.
2325 * Returns 0 on success.
2326 * 2485 *
2327 * It is expected this function will be called by a module when it is 2486 * It is expected this function will be called by a module when it is
2328 * unloaded. This will remove the cache completely, and avoid a duplicate 2487 * unloaded. This will remove the cache completely, and avoid a duplicate
@@ -2334,11 +2493,8 @@ EXPORT_SYMBOL(kmem_cache_shrink);
2334 * The caller must guarantee that noone will allocate memory from the cache 2493 * The caller must guarantee that noone will allocate memory from the cache
2335 * during the kmem_cache_destroy(). 2494 * during the kmem_cache_destroy().
2336 */ 2495 */
2337int kmem_cache_destroy(struct kmem_cache *cachep) 2496void kmem_cache_destroy(struct kmem_cache *cachep)
2338{ 2497{
2339 int i;
2340 struct kmem_list3 *l3;
2341
2342 BUG_ON(!cachep || in_interrupt()); 2498 BUG_ON(!cachep || in_interrupt());
2343 2499
2344 /* Don't let CPUs to come and go */ 2500 /* Don't let CPUs to come and go */
@@ -2358,31 +2514,28 @@ int kmem_cache_destroy(struct kmem_cache *cachep)
2358 list_add(&cachep->next, &cache_chain); 2514 list_add(&cachep->next, &cache_chain);
2359 mutex_unlock(&cache_chain_mutex); 2515 mutex_unlock(&cache_chain_mutex);
2360 unlock_cpu_hotplug(); 2516 unlock_cpu_hotplug();
2361 return 1; 2517 return;
2362 } 2518 }
2363 2519
2364 if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) 2520 if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU))
2365 synchronize_rcu(); 2521 synchronize_rcu();
2366 2522
2367 for_each_online_cpu(i) 2523 __kmem_cache_destroy(cachep);
2368 kfree(cachep->array[i]);
2369
2370 /* NUMA: free the list3 structures */
2371 for_each_online_node(i) {
2372 l3 = cachep->nodelists[i];
2373 if (l3) {
2374 kfree(l3->shared);
2375 free_alien_cache(l3->alien);
2376 kfree(l3);
2377 }
2378 }
2379 kmem_cache_free(&cache_cache, cachep);
2380 unlock_cpu_hotplug(); 2524 unlock_cpu_hotplug();
2381 return 0;
2382} 2525}
2383EXPORT_SYMBOL(kmem_cache_destroy); 2526EXPORT_SYMBOL(kmem_cache_destroy);
2384 2527
2385/* Get the memory for a slab management obj. */ 2528/*
2529 * Get the memory for a slab management obj.
2530 * For a slab cache when the slab descriptor is off-slab, slab descriptors
2531 * always come from malloc_sizes caches. The slab descriptor cannot
2532 * come from the same cache which is getting created because,
2533 * when we are searching for an appropriate cache for these
2534 * descriptors in kmem_cache_create, we search through the malloc_sizes array.
2535 * If we are creating a malloc_sizes cache here it would not be visible to
2536 * kmem_find_general_cachep till the initialization is complete.
2537 * Hence we cannot have slabp_cache same as the original cache.
2538 */
2386static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, 2539static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
2387 int colour_off, gfp_t local_flags, 2540 int colour_off, gfp_t local_flags,
2388 int nodeid) 2541 int nodeid)
@@ -2915,14 +3068,6 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
2915 void *objp; 3068 void *objp;
2916 struct array_cache *ac; 3069 struct array_cache *ac;
2917 3070
2918#ifdef CONFIG_NUMA
2919 if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) {
2920 objp = alternate_node_alloc(cachep, flags);
2921 if (objp != NULL)
2922 return objp;
2923 }
2924#endif
2925
2926 check_irq_off(); 3071 check_irq_off();
2927 ac = cpu_cache_get(cachep); 3072 ac = cpu_cache_get(cachep);
2928 if (likely(ac->avail)) { 3073 if (likely(ac->avail)) {
@@ -2940,12 +3085,24 @@ static __always_inline void *__cache_alloc(struct kmem_cache *cachep,
2940 gfp_t flags, void *caller) 3085 gfp_t flags, void *caller)
2941{ 3086{
2942 unsigned long save_flags; 3087 unsigned long save_flags;
2943 void *objp; 3088 void *objp = NULL;
2944 3089
2945 cache_alloc_debugcheck_before(cachep, flags); 3090 cache_alloc_debugcheck_before(cachep, flags);
2946 3091
2947 local_irq_save(save_flags); 3092 local_irq_save(save_flags);
2948 objp = ____cache_alloc(cachep, flags); 3093
3094 if (unlikely(NUMA_BUILD &&
3095 current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY)))
3096 objp = alternate_node_alloc(cachep, flags);
3097
3098 if (!objp)
3099 objp = ____cache_alloc(cachep, flags);
3100 /*
3101 * We may just have run out of memory on the local node.
3102 * __cache_alloc_node() knows how to locate memory on other nodes
3103 */
3104 if (NUMA_BUILD && !objp)
3105 objp = __cache_alloc_node(cachep, flags, numa_node_id());
2949 local_irq_restore(save_flags); 3106 local_irq_restore(save_flags);
2950 objp = cache_alloc_debugcheck_after(cachep, flags, objp, 3107 objp = cache_alloc_debugcheck_after(cachep, flags, objp,
2951 caller); 3108 caller);
@@ -2964,7 +3121,7 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
2964{ 3121{
2965 int nid_alloc, nid_here; 3122 int nid_alloc, nid_here;
2966 3123
2967 if (in_interrupt()) 3124 if (in_interrupt() || (flags & __GFP_THISNODE))
2968 return NULL; 3125 return NULL;
2969 nid_alloc = nid_here = numa_node_id(); 3126 nid_alloc = nid_here = numa_node_id();
2970 if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD)) 3127 if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
@@ -2977,6 +3134,28 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
2977} 3134}
2978 3135
2979/* 3136/*
3137 * Fallback function if there was no memory available and no objects on a
3138 * certain node and we are allowed to fall back. We mimick the behavior of
3139 * the page allocator. We fall back according to a zonelist determined by
3140 * the policy layer while obeying cpuset constraints.
3141 */
3142void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
3143{
3144 struct zonelist *zonelist = &NODE_DATA(slab_node(current->mempolicy))
3145 ->node_zonelists[gfp_zone(flags)];
3146 struct zone **z;
3147 void *obj = NULL;
3148
3149 for (z = zonelist->zones; *z && !obj; z++)
3150 if (zone_idx(*z) <= ZONE_NORMAL &&
3151 cpuset_zone_allowed(*z, flags))
3152 obj = __cache_alloc_node(cache,
3153 flags | __GFP_THISNODE,
3154 zone_to_nid(*z));
3155 return obj;
3156}
3157
3158/*
2980 * A interface to enable slab creation on nodeid 3159 * A interface to enable slab creation on nodeid
2981 */ 3160 */
2982static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, 3161static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
@@ -3029,11 +3208,15 @@ retry:
3029must_grow: 3208must_grow:
3030 spin_unlock(&l3->list_lock); 3209 spin_unlock(&l3->list_lock);
3031 x = cache_grow(cachep, flags, nodeid); 3210 x = cache_grow(cachep, flags, nodeid);
3211 if (x)
3212 goto retry;
3032 3213
3033 if (!x) 3214 if (!(flags & __GFP_THISNODE))
3034 return NULL; 3215 /* Unable to grow the cache. Fall back to other nodes. */
3216 return fallback_alloc(cachep, flags);
3217
3218 return NULL;
3035 3219
3036 goto retry;
3037done: 3220done:
3038 return obj; 3221 return obj;
3039} 3222}
@@ -3066,6 +3249,12 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
3066 if (slabp->inuse == 0) { 3249 if (slabp->inuse == 0) {
3067 if (l3->free_objects > l3->free_limit) { 3250 if (l3->free_objects > l3->free_limit) {
3068 l3->free_objects -= cachep->num; 3251 l3->free_objects -= cachep->num;
3252 /* No need to drop any previously held
3253 * lock here, even if we have a off-slab slab
3254 * descriptor it is guaranteed to come from
3255 * a different cache, refer to comments before
3256 * alloc_slabmgmt.
3257 */
3069 slab_destroy(cachep, slabp); 3258 slab_destroy(cachep, slabp);
3070 } else { 3259 } else {
3071 list_add(&slabp->list, &l3->slabs_free); 3260 list_add(&slabp->list, &l3->slabs_free);
@@ -3171,7 +3360,7 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3171EXPORT_SYMBOL(kmem_cache_alloc); 3360EXPORT_SYMBOL(kmem_cache_alloc);
3172 3361
3173/** 3362/**
3174 * kmem_cache_alloc - Allocate an object. The memory is set to zero. 3363 * kmem_cache_zalloc - Allocate an object. The memory is set to zero.
3175 * @cache: The cache to allocate from. 3364 * @cache: The cache to allocate from.
3176 * @flags: See kmalloc(). 3365 * @flags: See kmalloc().
3177 * 3366 *
@@ -3264,7 +3453,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
3264} 3453}
3265EXPORT_SYMBOL(kmem_cache_alloc_node); 3454EXPORT_SYMBOL(kmem_cache_alloc_node);
3266 3455
3267void *kmalloc_node(size_t size, gfp_t flags, int node) 3456void *__kmalloc_node(size_t size, gfp_t flags, int node)
3268{ 3457{
3269 struct kmem_cache *cachep; 3458 struct kmem_cache *cachep;
3270 3459
@@ -3273,7 +3462,7 @@ void *kmalloc_node(size_t size, gfp_t flags, int node)
3273 return NULL; 3462 return NULL;
3274 return kmem_cache_alloc_node(cachep, flags, node); 3463 return kmem_cache_alloc_node(cachep, flags, node);
3275} 3464}
3276EXPORT_SYMBOL(kmalloc_node); 3465EXPORT_SYMBOL(__kmalloc_node);
3277#endif 3466#endif
3278 3467
3279/** 3468/**
@@ -3317,55 +3506,6 @@ void *__kmalloc_track_caller(size_t size, gfp_t flags, void *caller)
3317EXPORT_SYMBOL(__kmalloc_track_caller); 3506EXPORT_SYMBOL(__kmalloc_track_caller);
3318#endif 3507#endif
3319 3508
3320#ifdef CONFIG_SMP
3321/**
3322 * __alloc_percpu - allocate one copy of the object for every present
3323 * cpu in the system, zeroing them.
3324 * Objects should be dereferenced using the per_cpu_ptr macro only.
3325 *
3326 * @size: how many bytes of memory are required.
3327 */
3328void *__alloc_percpu(size_t size)
3329{
3330 int i;
3331 struct percpu_data *pdata = kmalloc(sizeof(*pdata), GFP_KERNEL);
3332
3333 if (!pdata)
3334 return NULL;
3335
3336 /*
3337 * Cannot use for_each_online_cpu since a cpu may come online
3338 * and we have no way of figuring out how to fix the array
3339 * that we have allocated then....
3340 */
3341 for_each_possible_cpu(i) {
3342 int node = cpu_to_node(i);
3343
3344 if (node_online(node))
3345 pdata->ptrs[i] = kmalloc_node(size, GFP_KERNEL, node);
3346 else
3347 pdata->ptrs[i] = kmalloc(size, GFP_KERNEL);
3348
3349 if (!pdata->ptrs[i])
3350 goto unwind_oom;
3351 memset(pdata->ptrs[i], 0, size);
3352 }
3353
3354 /* Catch derefs w/o wrappers */
3355 return (void *)(~(unsigned long)pdata);
3356
3357unwind_oom:
3358 while (--i >= 0) {
3359 if (!cpu_possible(i))
3360 continue;
3361 kfree(pdata->ptrs[i]);
3362 }
3363 kfree(pdata);
3364 return NULL;
3365}
3366EXPORT_SYMBOL(__alloc_percpu);
3367#endif
3368
3369/** 3509/**
3370 * kmem_cache_free - Deallocate an object 3510 * kmem_cache_free - Deallocate an object
3371 * @cachep: The cache the allocation was from. 3511 * @cachep: The cache the allocation was from.
@@ -3405,35 +3545,12 @@ void kfree(const void *objp)
3405 local_irq_save(flags); 3545 local_irq_save(flags);
3406 kfree_debugcheck(objp); 3546 kfree_debugcheck(objp);
3407 c = virt_to_cache(objp); 3547 c = virt_to_cache(objp);
3408 mutex_debug_check_no_locks_freed(objp, obj_size(c)); 3548 debug_check_no_locks_freed(objp, obj_size(c));
3409 __cache_free(c, (void *)objp); 3549 __cache_free(c, (void *)objp);
3410 local_irq_restore(flags); 3550 local_irq_restore(flags);
3411} 3551}
3412EXPORT_SYMBOL(kfree); 3552EXPORT_SYMBOL(kfree);
3413 3553
3414#ifdef CONFIG_SMP
3415/**
3416 * free_percpu - free previously allocated percpu memory
3417 * @objp: pointer returned by alloc_percpu.
3418 *
3419 * Don't free memory not originally allocated by alloc_percpu()
3420 * The complemented objp is to check for that.
3421 */
3422void free_percpu(const void *objp)
3423{
3424 int i;
3425 struct percpu_data *p = (struct percpu_data *)(~(unsigned long)objp);
3426
3427 /*
3428 * We allocate for all cpus so we cannot use for online cpu here.
3429 */
3430 for_each_possible_cpu(i)
3431 kfree(p->ptrs[i]);
3432 kfree(p);
3433}
3434EXPORT_SYMBOL(free_percpu);
3435#endif
3436
3437unsigned int kmem_cache_size(struct kmem_cache *cachep) 3554unsigned int kmem_cache_size(struct kmem_cache *cachep)
3438{ 3555{
3439 return obj_size(cachep); 3556 return obj_size(cachep);
@@ -3550,22 +3667,26 @@ static void do_ccupdate_local(void *info)
3550static int do_tune_cpucache(struct kmem_cache *cachep, int limit, 3667static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
3551 int batchcount, int shared) 3668 int batchcount, int shared)
3552{ 3669{
3553 struct ccupdate_struct new; 3670 struct ccupdate_struct *new;
3554 int i, err; 3671 int i;
3672
3673 new = kzalloc(sizeof(*new), GFP_KERNEL);
3674 if (!new)
3675 return -ENOMEM;
3555 3676
3556 memset(&new.new, 0, sizeof(new.new));
3557 for_each_online_cpu(i) { 3677 for_each_online_cpu(i) {
3558 new.new[i] = alloc_arraycache(cpu_to_node(i), limit, 3678 new->new[i] = alloc_arraycache(cpu_to_node(i), limit,
3559 batchcount); 3679 batchcount);
3560 if (!new.new[i]) { 3680 if (!new->new[i]) {
3561 for (i--; i >= 0; i--) 3681 for (i--; i >= 0; i--)
3562 kfree(new.new[i]); 3682 kfree(new->new[i]);
3683 kfree(new);
3563 return -ENOMEM; 3684 return -ENOMEM;
3564 } 3685 }
3565 } 3686 }
3566 new.cachep = cachep; 3687 new->cachep = cachep;
3567 3688
3568 on_each_cpu(do_ccupdate_local, (void *)&new, 1, 1); 3689 on_each_cpu(do_ccupdate_local, (void *)new, 1, 1);
3569 3690
3570 check_irq_on(); 3691 check_irq_on();
3571 cachep->batchcount = batchcount; 3692 cachep->batchcount = batchcount;
@@ -3573,7 +3694,7 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
3573 cachep->shared = shared; 3694 cachep->shared = shared;
3574 3695
3575 for_each_online_cpu(i) { 3696 for_each_online_cpu(i) {
3576 struct array_cache *ccold = new.new[i]; 3697 struct array_cache *ccold = new->new[i];
3577 if (!ccold) 3698 if (!ccold)
3578 continue; 3699 continue;
3579 spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); 3700 spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
@@ -3581,18 +3702,12 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
3581 spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); 3702 spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
3582 kfree(ccold); 3703 kfree(ccold);
3583 } 3704 }
3584 3705 kfree(new);
3585 err = alloc_kmemlist(cachep); 3706 return alloc_kmemlist(cachep);
3586 if (err) {
3587 printk(KERN_ERR "alloc_kmemlist failed for %s, error %d.\n",
3588 cachep->name, -err);
3589 BUG();
3590 }
3591 return 0;
3592} 3707}
3593 3708
3594/* Called with cache_chain_mutex held always */ 3709/* Called with cache_chain_mutex held always */
3595static void enable_cpucache(struct kmem_cache *cachep) 3710static int enable_cpucache(struct kmem_cache *cachep)
3596{ 3711{
3597 int err; 3712 int err;
3598 int limit, shared; 3713 int limit, shared;
@@ -3644,6 +3759,7 @@ static void enable_cpucache(struct kmem_cache *cachep)
3644 if (err) 3759 if (err)
3645 printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n", 3760 printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
3646 cachep->name, -err); 3761 cachep->name, -err);
3762 return err;
3647} 3763}
3648 3764
3649/* 3765/*
@@ -3701,10 +3817,6 @@ static void cache_reap(void *unused)
3701 } 3817 }
3702 3818
3703 list_for_each_entry(searchp, &cache_chain, next) { 3819 list_for_each_entry(searchp, &cache_chain, next) {
3704 struct list_head *p;
3705 int tofree;
3706 struct slab *slabp;
3707
3708 check_irq_on(); 3820 check_irq_on();
3709 3821
3710 /* 3822 /*
@@ -3729,47 +3841,22 @@ static void cache_reap(void *unused)
3729 3841
3730 drain_array(searchp, l3, l3->shared, 0, node); 3842 drain_array(searchp, l3, l3->shared, 0, node);
3731 3843
3732 if (l3->free_touched) { 3844 if (l3->free_touched)
3733 l3->free_touched = 0; 3845 l3->free_touched = 0;
3734 goto next; 3846 else {
3735 } 3847 int freed;
3736 3848
3737 tofree = (l3->free_limit + 5 * searchp->num - 1) / 3849 freed = drain_freelist(searchp, l3, (l3->free_limit +
3738 (5 * searchp->num); 3850 5 * searchp->num - 1) / (5 * searchp->num));
3739 do { 3851 STATS_ADD_REAPED(searchp, freed);
3740 /* 3852 }
3741 * Do not lock if there are no free blocks.
3742 */
3743 if (list_empty(&l3->slabs_free))
3744 break;
3745
3746 spin_lock_irq(&l3->list_lock);
3747 p = l3->slabs_free.next;
3748 if (p == &(l3->slabs_free)) {
3749 spin_unlock_irq(&l3->list_lock);
3750 break;
3751 }
3752
3753 slabp = list_entry(p, struct slab, list);
3754 BUG_ON(slabp->inuse);
3755 list_del(&slabp->list);
3756 STATS_INC_REAPED(searchp);
3757
3758 /*
3759 * Safe to drop the lock. The slab is no longer linked
3760 * to the cache. searchp cannot disappear, we hold
3761 * cache_chain_lock
3762 */
3763 l3->free_objects -= searchp->num;
3764 spin_unlock_irq(&l3->list_lock);
3765 slab_destroy(searchp, slabp);
3766 } while (--tofree > 0);
3767next: 3853next:
3768 cond_resched(); 3854 cond_resched();
3769 } 3855 }
3770 check_irq_on(); 3856 check_irq_on();
3771 mutex_unlock(&cache_chain_mutex); 3857 mutex_unlock(&cache_chain_mutex);
3772 next_reap_node(); 3858 next_reap_node();
3859 refresh_cpu_vm_stats(smp_processor_id());
3773 /* Set up the next iteration */ 3860 /* Set up the next iteration */
3774 schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC); 3861 schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC);
3775} 3862}
@@ -4133,6 +4220,7 @@ static int leaks_show(struct seq_file *m, void *p)
4133 show_symbol(m, n[2*i+2]); 4220 show_symbol(m, n[2*i+2]);
4134 seq_putc(m, '\n'); 4221 seq_putc(m, '\n');
4135 } 4222 }
4223
4136 return 0; 4224 return 0;
4137} 4225}
4138 4226