aboutsummaryrefslogtreecommitdiffstats
path: root/mm/slab.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/slab.c')
-rw-r--r--mm/slab.c434
1 files changed, 238 insertions, 196 deletions
diff --git a/mm/slab.c b/mm/slab.c
index 21ba06035700..792bfe320a8b 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -313,7 +313,7 @@ static int drain_freelist(struct kmem_cache *cache,
313 struct kmem_list3 *l3, int tofree); 313 struct kmem_list3 *l3, int tofree);
314static void free_block(struct kmem_cache *cachep, void **objpp, int len, 314static void free_block(struct kmem_cache *cachep, void **objpp, int len,
315 int node); 315 int node);
316static void enable_cpucache(struct kmem_cache *cachep); 316static int enable_cpucache(struct kmem_cache *cachep);
317static void cache_reap(void *unused); 317static void cache_reap(void *unused);
318 318
319/* 319/*
@@ -674,6 +674,8 @@ static struct kmem_cache cache_cache = {
674#endif 674#endif
675}; 675};
676 676
677#define BAD_ALIEN_MAGIC 0x01020304ul
678
677#ifdef CONFIG_LOCKDEP 679#ifdef CONFIG_LOCKDEP
678 680
679/* 681/*
@@ -682,42 +684,58 @@ static struct kmem_cache cache_cache = {
682 * The locking for this is tricky in that it nests within the locks 684 * The locking for this is tricky in that it nests within the locks
683 * of all other slabs in a few places; to deal with this special 685 * of all other slabs in a few places; to deal with this special
684 * locking we put on-slab caches into a separate lock-class. 686 * locking we put on-slab caches into a separate lock-class.
687 *
688 * We set lock class for alien array caches which are up during init.
689 * The lock annotation will be lost if all cpus of a node goes down and
690 * then comes back up during hotplug
685 */ 691 */
686static struct lock_class_key on_slab_key; 692static struct lock_class_key on_slab_l3_key;
693static struct lock_class_key on_slab_alc_key;
694
695static inline void init_lock_keys(void)
687 696
688static inline void init_lock_keys(struct cache_sizes *s)
689{ 697{
690 int q; 698 int q;
691 699 struct cache_sizes *s = malloc_sizes;
692 for (q = 0; q < MAX_NUMNODES; q++) { 700
693 if (!s->cs_cachep->nodelists[q] || OFF_SLAB(s->cs_cachep)) 701 while (s->cs_size != ULONG_MAX) {
694 continue; 702 for_each_node(q) {
695 lockdep_set_class(&s->cs_cachep->nodelists[q]->list_lock, 703 struct array_cache **alc;
696 &on_slab_key); 704 int r;
705 struct kmem_list3 *l3 = s->cs_cachep->nodelists[q];
706 if (!l3 || OFF_SLAB(s->cs_cachep))
707 continue;
708 lockdep_set_class(&l3->list_lock, &on_slab_l3_key);
709 alc = l3->alien;
710 /*
711 * FIXME: This check for BAD_ALIEN_MAGIC
712 * should go away when common slab code is taught to
713 * work even without alien caches.
714 * Currently, non NUMA code returns BAD_ALIEN_MAGIC
715 * for alloc_alien_cache,
716 */
717 if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
718 continue;
719 for_each_node(r) {
720 if (alc[r])
721 lockdep_set_class(&alc[r]->lock,
722 &on_slab_alc_key);
723 }
724 }
725 s++;
697 } 726 }
698} 727}
699
700#else 728#else
701static inline void init_lock_keys(struct cache_sizes *s) 729static inline void init_lock_keys(void)
702{ 730{
703} 731}
704#endif 732#endif
705 733
706
707
708/* Guard access to the cache-chain. */ 734/* Guard access to the cache-chain. */
709static DEFINE_MUTEX(cache_chain_mutex); 735static DEFINE_MUTEX(cache_chain_mutex);
710static struct list_head cache_chain; 736static struct list_head cache_chain;
711 737
712/* 738/*
713 * vm_enough_memory() looks at this to determine how many slab-allocated pages
714 * are possibly freeable under pressure
715 *
716 * SLAB_RECLAIM_ACCOUNT turns this on per-slab
717 */
718atomic_t slab_reclaim_pages;
719
720/*
721 * chicken and egg problem: delay the per-cpu array allocation 739 * chicken and egg problem: delay the per-cpu array allocation
722 * until the general caches are up. 740 * until the general caches are up.
723 */ 741 */
@@ -768,11 +786,10 @@ static inline struct kmem_cache *__find_general_cachep(size_t size,
768 return csizep->cs_cachep; 786 return csizep->cs_cachep;
769} 787}
770 788
771struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags) 789static struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags)
772{ 790{
773 return __find_general_cachep(size, gfpflags); 791 return __find_general_cachep(size, gfpflags);
774} 792}
775EXPORT_SYMBOL(kmem_find_general_cachep);
776 793
777static size_t slab_mgmt_size(size_t nr_objs, size_t align) 794static size_t slab_mgmt_size(size_t nr_objs, size_t align)
778{ 795{
@@ -955,7 +972,39 @@ static int transfer_objects(struct array_cache *to,
955 return nr; 972 return nr;
956} 973}
957 974
958#ifdef CONFIG_NUMA 975#ifndef CONFIG_NUMA
976
977#define drain_alien_cache(cachep, alien) do { } while (0)
978#define reap_alien(cachep, l3) do { } while (0)
979
980static inline struct array_cache **alloc_alien_cache(int node, int limit)
981{
982 return (struct array_cache **)BAD_ALIEN_MAGIC;
983}
984
985static inline void free_alien_cache(struct array_cache **ac_ptr)
986{
987}
988
989static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
990{
991 return 0;
992}
993
994static inline void *alternate_node_alloc(struct kmem_cache *cachep,
995 gfp_t flags)
996{
997 return NULL;
998}
999
1000static inline void *__cache_alloc_node(struct kmem_cache *cachep,
1001 gfp_t flags, int nodeid)
1002{
1003 return NULL;
1004}
1005
1006#else /* CONFIG_NUMA */
1007
959static void *__cache_alloc_node(struct kmem_cache *, gfp_t, int); 1008static void *__cache_alloc_node(struct kmem_cache *, gfp_t, int);
960static void *alternate_node_alloc(struct kmem_cache *, gfp_t); 1009static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
961 1010
@@ -1084,26 +1133,6 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1084 } 1133 }
1085 return 1; 1134 return 1;
1086} 1135}
1087
1088#else
1089
1090#define drain_alien_cache(cachep, alien) do { } while (0)
1091#define reap_alien(cachep, l3) do { } while (0)
1092
1093static inline struct array_cache **alloc_alien_cache(int node, int limit)
1094{
1095 return (struct array_cache **) 0x01020304ul;
1096}
1097
1098static inline void free_alien_cache(struct array_cache **ac_ptr)
1099{
1100}
1101
1102static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1103{
1104 return 0;
1105}
1106
1107#endif 1136#endif
1108 1137
1109static int __cpuinit cpuup_callback(struct notifier_block *nfb, 1138static int __cpuinit cpuup_callback(struct notifier_block *nfb,
@@ -1422,7 +1451,6 @@ void __init kmem_cache_init(void)
1422 ARCH_KMALLOC_FLAGS|SLAB_PANIC, 1451 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1423 NULL, NULL); 1452 NULL, NULL);
1424 } 1453 }
1425 init_lock_keys(sizes);
1426 1454
1427 sizes->cs_dmacachep = kmem_cache_create(names->name_dma, 1455 sizes->cs_dmacachep = kmem_cache_create(names->name_dma,
1428 sizes->cs_size, 1456 sizes->cs_size,
@@ -1491,10 +1519,15 @@ void __init kmem_cache_init(void)
1491 struct kmem_cache *cachep; 1519 struct kmem_cache *cachep;
1492 mutex_lock(&cache_chain_mutex); 1520 mutex_lock(&cache_chain_mutex);
1493 list_for_each_entry(cachep, &cache_chain, next) 1521 list_for_each_entry(cachep, &cache_chain, next)
1494 enable_cpucache(cachep); 1522 if (enable_cpucache(cachep))
1523 BUG();
1495 mutex_unlock(&cache_chain_mutex); 1524 mutex_unlock(&cache_chain_mutex);
1496 } 1525 }
1497 1526
1527 /* Annotate slab for lockdep -- annotate the malloc caches */
1528 init_lock_keys();
1529
1530
1498 /* Done! */ 1531 /* Done! */
1499 g_cpucache_up = FULL; 1532 g_cpucache_up = FULL;
1500 1533
@@ -1543,7 +1576,13 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
1543 */ 1576 */
1544 flags |= __GFP_COMP; 1577 flags |= __GFP_COMP;
1545#endif 1578#endif
1546 flags |= cachep->gfpflags; 1579
1580 /*
1581 * Under NUMA we want memory on the indicated node. We will handle
1582 * the needed fallback ourselves since we want to serve from our
1583 * per node object lists first for other nodes.
1584 */
1585 flags |= cachep->gfpflags | GFP_THISNODE;
1547 1586
1548 page = alloc_pages_node(nodeid, flags, cachep->gfporder); 1587 page = alloc_pages_node(nodeid, flags, cachep->gfporder);
1549 if (!page) 1588 if (!page)
@@ -1551,8 +1590,11 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
1551 1590
1552 nr_pages = (1 << cachep->gfporder); 1591 nr_pages = (1 << cachep->gfporder);
1553 if (cachep->flags & SLAB_RECLAIM_ACCOUNT) 1592 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1554 atomic_add(nr_pages, &slab_reclaim_pages); 1593 add_zone_page_state(page_zone(page),
1555 add_zone_page_state(page_zone(page), NR_SLAB, nr_pages); 1594 NR_SLAB_RECLAIMABLE, nr_pages);
1595 else
1596 add_zone_page_state(page_zone(page),
1597 NR_SLAB_UNRECLAIMABLE, nr_pages);
1556 for (i = 0; i < nr_pages; i++) 1598 for (i = 0; i < nr_pages; i++)
1557 __SetPageSlab(page + i); 1599 __SetPageSlab(page + i);
1558 return page_address(page); 1600 return page_address(page);
@@ -1567,7 +1609,12 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
1567 struct page *page = virt_to_page(addr); 1609 struct page *page = virt_to_page(addr);
1568 const unsigned long nr_freed = i; 1610 const unsigned long nr_freed = i;
1569 1611
1570 sub_zone_page_state(page_zone(page), NR_SLAB, nr_freed); 1612 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1613 sub_zone_page_state(page_zone(page),
1614 NR_SLAB_RECLAIMABLE, nr_freed);
1615 else
1616 sub_zone_page_state(page_zone(page),
1617 NR_SLAB_UNRECLAIMABLE, nr_freed);
1571 while (i--) { 1618 while (i--) {
1572 BUG_ON(!PageSlab(page)); 1619 BUG_ON(!PageSlab(page));
1573 __ClearPageSlab(page); 1620 __ClearPageSlab(page);
@@ -1576,8 +1623,6 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
1576 if (current->reclaim_state) 1623 if (current->reclaim_state)
1577 current->reclaim_state->reclaimed_slab += nr_freed; 1624 current->reclaim_state->reclaimed_slab += nr_freed;
1578 free_pages((unsigned long)addr, cachep->gfporder); 1625 free_pages((unsigned long)addr, cachep->gfporder);
1579 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1580 atomic_sub(1 << cachep->gfporder, &slab_reclaim_pages);
1581} 1626}
1582 1627
1583static void kmem_rcu_free(struct rcu_head *head) 1628static void kmem_rcu_free(struct rcu_head *head)
@@ -1834,6 +1879,27 @@ static void set_up_list3s(struct kmem_cache *cachep, int index)
1834 } 1879 }
1835} 1880}
1836 1881
1882static void __kmem_cache_destroy(struct kmem_cache *cachep)
1883{
1884 int i;
1885 struct kmem_list3 *l3;
1886
1887 for_each_online_cpu(i)
1888 kfree(cachep->array[i]);
1889
1890 /* NUMA: free the list3 structures */
1891 for_each_online_node(i) {
1892 l3 = cachep->nodelists[i];
1893 if (l3) {
1894 kfree(l3->shared);
1895 free_alien_cache(l3->alien);
1896 kfree(l3);
1897 }
1898 }
1899 kmem_cache_free(&cache_cache, cachep);
1900}
1901
1902
1837/** 1903/**
1838 * calculate_slab_order - calculate size (page order) of slabs 1904 * calculate_slab_order - calculate size (page order) of slabs
1839 * @cachep: pointer to the cache that is being created 1905 * @cachep: pointer to the cache that is being created
@@ -1904,12 +1970,11 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
1904 return left_over; 1970 return left_over;
1905} 1971}
1906 1972
1907static void setup_cpu_cache(struct kmem_cache *cachep) 1973static int setup_cpu_cache(struct kmem_cache *cachep)
1908{ 1974{
1909 if (g_cpucache_up == FULL) { 1975 if (g_cpucache_up == FULL)
1910 enable_cpucache(cachep); 1976 return enable_cpucache(cachep);
1911 return; 1977
1912 }
1913 if (g_cpucache_up == NONE) { 1978 if (g_cpucache_up == NONE) {
1914 /* 1979 /*
1915 * Note: the first kmem_cache_create must create the cache 1980 * Note: the first kmem_cache_create must create the cache
@@ -1956,6 +2021,7 @@ static void setup_cpu_cache(struct kmem_cache *cachep)
1956 cpu_cache_get(cachep)->touched = 0; 2021 cpu_cache_get(cachep)->touched = 0;
1957 cachep->batchcount = 1; 2022 cachep->batchcount = 1;
1958 cachep->limit = BOOT_CPUCACHE_ENTRIES; 2023 cachep->limit = BOOT_CPUCACHE_ENTRIES;
2024 return 0;
1959} 2025}
1960 2026
1961/** 2027/**
@@ -2097,6 +2163,15 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2097 } else { 2163 } else {
2098 ralign = BYTES_PER_WORD; 2164 ralign = BYTES_PER_WORD;
2099 } 2165 }
2166
2167 /*
2168 * Redzoning and user store require word alignment. Note this will be
2169 * overridden by architecture or caller mandated alignment if either
2170 * is greater than BYTES_PER_WORD.
2171 */
2172 if (flags & SLAB_RED_ZONE || flags & SLAB_STORE_USER)
2173 ralign = BYTES_PER_WORD;
2174
2100 /* 2) arch mandated alignment: disables debug if necessary */ 2175 /* 2) arch mandated alignment: disables debug if necessary */
2101 if (ralign < ARCH_SLAB_MINALIGN) { 2176 if (ralign < ARCH_SLAB_MINALIGN) {
2102 ralign = ARCH_SLAB_MINALIGN; 2177 ralign = ARCH_SLAB_MINALIGN;
@@ -2110,8 +2185,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2110 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); 2185 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
2111 } 2186 }
2112 /* 2187 /*
2113 * 4) Store it. Note that the debug code below can reduce 2188 * 4) Store it.
2114 * the alignment to BYTES_PER_WORD.
2115 */ 2189 */
2116 align = ralign; 2190 align = ralign;
2117 2191
@@ -2123,20 +2197,19 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2123#if DEBUG 2197#if DEBUG
2124 cachep->obj_size = size; 2198 cachep->obj_size = size;
2125 2199
2200 /*
2201 * Both debugging options require word-alignment which is calculated
2202 * into align above.
2203 */
2126 if (flags & SLAB_RED_ZONE) { 2204 if (flags & SLAB_RED_ZONE) {
2127 /* redzoning only works with word aligned caches */
2128 align = BYTES_PER_WORD;
2129
2130 /* add space for red zone words */ 2205 /* add space for red zone words */
2131 cachep->obj_offset += BYTES_PER_WORD; 2206 cachep->obj_offset += BYTES_PER_WORD;
2132 size += 2 * BYTES_PER_WORD; 2207 size += 2 * BYTES_PER_WORD;
2133 } 2208 }
2134 if (flags & SLAB_STORE_USER) { 2209 if (flags & SLAB_STORE_USER) {
2135 /* user store requires word alignment and 2210 /* user store requires one word storage behind the end of
2136 * one word storage behind the end of the real 2211 * the real object.
2137 * object.
2138 */ 2212 */
2139 align = BYTES_PER_WORD;
2140 size += BYTES_PER_WORD; 2213 size += BYTES_PER_WORD;
2141 } 2214 }
2142#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC) 2215#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
@@ -2200,14 +2273,26 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2200 cachep->gfpflags |= GFP_DMA; 2273 cachep->gfpflags |= GFP_DMA;
2201 cachep->buffer_size = size; 2274 cachep->buffer_size = size;
2202 2275
2203 if (flags & CFLGS_OFF_SLAB) 2276 if (flags & CFLGS_OFF_SLAB) {
2204 cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u); 2277 cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u);
2278 /*
2279 * This is a possibility for one of the malloc_sizes caches.
2280 * But since we go off slab only for object size greater than
2281 * PAGE_SIZE/8, and malloc_sizes gets created in ascending order,
2282 * this should not happen at all.
2283 * But leave a BUG_ON for some lucky dude.
2284 */
2285 BUG_ON(!cachep->slabp_cache);
2286 }
2205 cachep->ctor = ctor; 2287 cachep->ctor = ctor;
2206 cachep->dtor = dtor; 2288 cachep->dtor = dtor;
2207 cachep->name = name; 2289 cachep->name = name;
2208 2290
2209 2291 if (setup_cpu_cache(cachep)) {
2210 setup_cpu_cache(cachep); 2292 __kmem_cache_destroy(cachep);
2293 cachep = NULL;
2294 goto oops;
2295 }
2211 2296
2212 /* cache setup completed, link it into the list */ 2297 /* cache setup completed, link it into the list */
2213 list_add(&cachep->next, &cache_chain); 2298 list_add(&cachep->next, &cache_chain);
@@ -2375,7 +2460,6 @@ EXPORT_SYMBOL(kmem_cache_shrink);
2375 * @cachep: the cache to destroy 2460 * @cachep: the cache to destroy
2376 * 2461 *
2377 * Remove a struct kmem_cache object from the slab cache. 2462 * Remove a struct kmem_cache object from the slab cache.
2378 * Returns 0 on success.
2379 * 2463 *
2380 * It is expected this function will be called by a module when it is 2464 * It is expected this function will be called by a module when it is
2381 * unloaded. This will remove the cache completely, and avoid a duplicate 2465 * unloaded. This will remove the cache completely, and avoid a duplicate
@@ -2387,11 +2471,8 @@ EXPORT_SYMBOL(kmem_cache_shrink);
2387 * The caller must guarantee that noone will allocate memory from the cache 2471 * The caller must guarantee that noone will allocate memory from the cache
2388 * during the kmem_cache_destroy(). 2472 * during the kmem_cache_destroy().
2389 */ 2473 */
2390int kmem_cache_destroy(struct kmem_cache *cachep) 2474void kmem_cache_destroy(struct kmem_cache *cachep)
2391{ 2475{
2392 int i;
2393 struct kmem_list3 *l3;
2394
2395 BUG_ON(!cachep || in_interrupt()); 2476 BUG_ON(!cachep || in_interrupt());
2396 2477
2397 /* Don't let CPUs to come and go */ 2478 /* Don't let CPUs to come and go */
@@ -2411,31 +2492,28 @@ int kmem_cache_destroy(struct kmem_cache *cachep)
2411 list_add(&cachep->next, &cache_chain); 2492 list_add(&cachep->next, &cache_chain);
2412 mutex_unlock(&cache_chain_mutex); 2493 mutex_unlock(&cache_chain_mutex);
2413 unlock_cpu_hotplug(); 2494 unlock_cpu_hotplug();
2414 return 1; 2495 return;
2415 } 2496 }
2416 2497
2417 if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) 2498 if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU))
2418 synchronize_rcu(); 2499 synchronize_rcu();
2419 2500
2420 for_each_online_cpu(i) 2501 __kmem_cache_destroy(cachep);
2421 kfree(cachep->array[i]);
2422
2423 /* NUMA: free the list3 structures */
2424 for_each_online_node(i) {
2425 l3 = cachep->nodelists[i];
2426 if (l3) {
2427 kfree(l3->shared);
2428 free_alien_cache(l3->alien);
2429 kfree(l3);
2430 }
2431 }
2432 kmem_cache_free(&cache_cache, cachep);
2433 unlock_cpu_hotplug(); 2502 unlock_cpu_hotplug();
2434 return 0;
2435} 2503}
2436EXPORT_SYMBOL(kmem_cache_destroy); 2504EXPORT_SYMBOL(kmem_cache_destroy);
2437 2505
2438/* Get the memory for a slab management obj. */ 2506/*
2507 * Get the memory for a slab management obj.
2508 * For a slab cache when the slab descriptor is off-slab, slab descriptors
2509 * always come from malloc_sizes caches. The slab descriptor cannot
2510 * come from the same cache which is getting created because,
2511 * when we are searching for an appropriate cache for these
2512 * descriptors in kmem_cache_create, we search through the malloc_sizes array.
2513 * If we are creating a malloc_sizes cache here it would not be visible to
2514 * kmem_find_general_cachep till the initialization is complete.
2515 * Hence we cannot have slabp_cache same as the original cache.
2516 */
2439static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, 2517static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
2440 int colour_off, gfp_t local_flags, 2518 int colour_off, gfp_t local_flags,
2441 int nodeid) 2519 int nodeid)
@@ -2968,14 +3046,6 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
2968 void *objp; 3046 void *objp;
2969 struct array_cache *ac; 3047 struct array_cache *ac;
2970 3048
2971#ifdef CONFIG_NUMA
2972 if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) {
2973 objp = alternate_node_alloc(cachep, flags);
2974 if (objp != NULL)
2975 return objp;
2976 }
2977#endif
2978
2979 check_irq_off(); 3049 check_irq_off();
2980 ac = cpu_cache_get(cachep); 3050 ac = cpu_cache_get(cachep);
2981 if (likely(ac->avail)) { 3051 if (likely(ac->avail)) {
@@ -2993,12 +3063,24 @@ static __always_inline void *__cache_alloc(struct kmem_cache *cachep,
2993 gfp_t flags, void *caller) 3063 gfp_t flags, void *caller)
2994{ 3064{
2995 unsigned long save_flags; 3065 unsigned long save_flags;
2996 void *objp; 3066 void *objp = NULL;
2997 3067
2998 cache_alloc_debugcheck_before(cachep, flags); 3068 cache_alloc_debugcheck_before(cachep, flags);
2999 3069
3000 local_irq_save(save_flags); 3070 local_irq_save(save_flags);
3001 objp = ____cache_alloc(cachep, flags); 3071
3072 if (unlikely(NUMA_BUILD &&
3073 current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY)))
3074 objp = alternate_node_alloc(cachep, flags);
3075
3076 if (!objp)
3077 objp = ____cache_alloc(cachep, flags);
3078 /*
3079 * We may just have run out of memory on the local node.
3080 * __cache_alloc_node() knows how to locate memory on other nodes
3081 */
3082 if (NUMA_BUILD && !objp)
3083 objp = __cache_alloc_node(cachep, flags, numa_node_id());
3002 local_irq_restore(save_flags); 3084 local_irq_restore(save_flags);
3003 objp = cache_alloc_debugcheck_after(cachep, flags, objp, 3085 objp = cache_alloc_debugcheck_after(cachep, flags, objp,
3004 caller); 3086 caller);
@@ -3017,7 +3099,7 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
3017{ 3099{
3018 int nid_alloc, nid_here; 3100 int nid_alloc, nid_here;
3019 3101
3020 if (in_interrupt()) 3102 if (in_interrupt() || (flags & __GFP_THISNODE))
3021 return NULL; 3103 return NULL;
3022 nid_alloc = nid_here = numa_node_id(); 3104 nid_alloc = nid_here = numa_node_id();
3023 if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD)) 3105 if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
@@ -3030,6 +3112,28 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
3030} 3112}
3031 3113
3032/* 3114/*
3115 * Fallback function if there was no memory available and no objects on a
3116 * certain node and we are allowed to fall back. We mimick the behavior of
3117 * the page allocator. We fall back according to a zonelist determined by
3118 * the policy layer while obeying cpuset constraints.
3119 */
3120void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
3121{
3122 struct zonelist *zonelist = &NODE_DATA(slab_node(current->mempolicy))
3123 ->node_zonelists[gfp_zone(flags)];
3124 struct zone **z;
3125 void *obj = NULL;
3126
3127 for (z = zonelist->zones; *z && !obj; z++)
3128 if (zone_idx(*z) <= ZONE_NORMAL &&
3129 cpuset_zone_allowed(*z, flags))
3130 obj = __cache_alloc_node(cache,
3131 flags | __GFP_THISNODE,
3132 zone_to_nid(*z));
3133 return obj;
3134}
3135
3136/*
3033 * A interface to enable slab creation on nodeid 3137 * A interface to enable slab creation on nodeid
3034 */ 3138 */
3035static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, 3139static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
@@ -3082,11 +3186,15 @@ retry:
3082must_grow: 3186must_grow:
3083 spin_unlock(&l3->list_lock); 3187 spin_unlock(&l3->list_lock);
3084 x = cache_grow(cachep, flags, nodeid); 3188 x = cache_grow(cachep, flags, nodeid);
3189 if (x)
3190 goto retry;
3085 3191
3086 if (!x) 3192 if (!(flags & __GFP_THISNODE))
3087 return NULL; 3193 /* Unable to grow the cache. Fall back to other nodes. */
3194 return fallback_alloc(cachep, flags);
3195
3196 return NULL;
3088 3197
3089 goto retry;
3090done: 3198done:
3091 return obj; 3199 return obj;
3092} 3200}
@@ -3119,6 +3227,12 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
3119 if (slabp->inuse == 0) { 3227 if (slabp->inuse == 0) {
3120 if (l3->free_objects > l3->free_limit) { 3228 if (l3->free_objects > l3->free_limit) {
3121 l3->free_objects -= cachep->num; 3229 l3->free_objects -= cachep->num;
3230 /* No need to drop any previously held
3231 * lock here, even if we have a off-slab slab
3232 * descriptor it is guaranteed to come from
3233 * a different cache, refer to comments before
3234 * alloc_slabmgmt.
3235 */
3122 slab_destroy(cachep, slabp); 3236 slab_destroy(cachep, slabp);
3123 } else { 3237 } else {
3124 list_add(&slabp->list, &l3->slabs_free); 3238 list_add(&slabp->list, &l3->slabs_free);
@@ -3317,7 +3431,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
3317} 3431}
3318EXPORT_SYMBOL(kmem_cache_alloc_node); 3432EXPORT_SYMBOL(kmem_cache_alloc_node);
3319 3433
3320void *kmalloc_node(size_t size, gfp_t flags, int node) 3434void *__kmalloc_node(size_t size, gfp_t flags, int node)
3321{ 3435{
3322 struct kmem_cache *cachep; 3436 struct kmem_cache *cachep;
3323 3437
@@ -3326,7 +3440,7 @@ void *kmalloc_node(size_t size, gfp_t flags, int node)
3326 return NULL; 3440 return NULL;
3327 return kmem_cache_alloc_node(cachep, flags, node); 3441 return kmem_cache_alloc_node(cachep, flags, node);
3328} 3442}
3329EXPORT_SYMBOL(kmalloc_node); 3443EXPORT_SYMBOL(__kmalloc_node);
3330#endif 3444#endif
3331 3445
3332/** 3446/**
@@ -3370,55 +3484,6 @@ void *__kmalloc_track_caller(size_t size, gfp_t flags, void *caller)
3370EXPORT_SYMBOL(__kmalloc_track_caller); 3484EXPORT_SYMBOL(__kmalloc_track_caller);
3371#endif 3485#endif
3372 3486
3373#ifdef CONFIG_SMP
3374/**
3375 * __alloc_percpu - allocate one copy of the object for every present
3376 * cpu in the system, zeroing them.
3377 * Objects should be dereferenced using the per_cpu_ptr macro only.
3378 *
3379 * @size: how many bytes of memory are required.
3380 */
3381void *__alloc_percpu(size_t size)
3382{
3383 int i;
3384 struct percpu_data *pdata = kmalloc(sizeof(*pdata), GFP_KERNEL);
3385
3386 if (!pdata)
3387 return NULL;
3388
3389 /*
3390 * Cannot use for_each_online_cpu since a cpu may come online
3391 * and we have no way of figuring out how to fix the array
3392 * that we have allocated then....
3393 */
3394 for_each_possible_cpu(i) {
3395 int node = cpu_to_node(i);
3396
3397 if (node_online(node))
3398 pdata->ptrs[i] = kmalloc_node(size, GFP_KERNEL, node);
3399 else
3400 pdata->ptrs[i] = kmalloc(size, GFP_KERNEL);
3401
3402 if (!pdata->ptrs[i])
3403 goto unwind_oom;
3404 memset(pdata->ptrs[i], 0, size);
3405 }
3406
3407 /* Catch derefs w/o wrappers */
3408 return (void *)(~(unsigned long)pdata);
3409
3410unwind_oom:
3411 while (--i >= 0) {
3412 if (!cpu_possible(i))
3413 continue;
3414 kfree(pdata->ptrs[i]);
3415 }
3416 kfree(pdata);
3417 return NULL;
3418}
3419EXPORT_SYMBOL(__alloc_percpu);
3420#endif
3421
3422/** 3487/**
3423 * kmem_cache_free - Deallocate an object 3488 * kmem_cache_free - Deallocate an object
3424 * @cachep: The cache the allocation was from. 3489 * @cachep: The cache the allocation was from.
@@ -3464,29 +3529,6 @@ void kfree(const void *objp)
3464} 3529}
3465EXPORT_SYMBOL(kfree); 3530EXPORT_SYMBOL(kfree);
3466 3531
3467#ifdef CONFIG_SMP
3468/**
3469 * free_percpu - free previously allocated percpu memory
3470 * @objp: pointer returned by alloc_percpu.
3471 *
3472 * Don't free memory not originally allocated by alloc_percpu()
3473 * The complemented objp is to check for that.
3474 */
3475void free_percpu(const void *objp)
3476{
3477 int i;
3478 struct percpu_data *p = (struct percpu_data *)(~(unsigned long)objp);
3479
3480 /*
3481 * We allocate for all cpus so we cannot use for online cpu here.
3482 */
3483 for_each_possible_cpu(i)
3484 kfree(p->ptrs[i]);
3485 kfree(p);
3486}
3487EXPORT_SYMBOL(free_percpu);
3488#endif
3489
3490unsigned int kmem_cache_size(struct kmem_cache *cachep) 3532unsigned int kmem_cache_size(struct kmem_cache *cachep)
3491{ 3533{
3492 return obj_size(cachep); 3534 return obj_size(cachep);
@@ -3603,22 +3645,26 @@ static void do_ccupdate_local(void *info)
3603static int do_tune_cpucache(struct kmem_cache *cachep, int limit, 3645static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
3604 int batchcount, int shared) 3646 int batchcount, int shared)
3605{ 3647{
3606 struct ccupdate_struct new; 3648 struct ccupdate_struct *new;
3607 int i, err; 3649 int i;
3650
3651 new = kzalloc(sizeof(*new), GFP_KERNEL);
3652 if (!new)
3653 return -ENOMEM;
3608 3654
3609 memset(&new.new, 0, sizeof(new.new));
3610 for_each_online_cpu(i) { 3655 for_each_online_cpu(i) {
3611 new.new[i] = alloc_arraycache(cpu_to_node(i), limit, 3656 new->new[i] = alloc_arraycache(cpu_to_node(i), limit,
3612 batchcount); 3657 batchcount);
3613 if (!new.new[i]) { 3658 if (!new->new[i]) {
3614 for (i--; i >= 0; i--) 3659 for (i--; i >= 0; i--)
3615 kfree(new.new[i]); 3660 kfree(new->new[i]);
3661 kfree(new);
3616 return -ENOMEM; 3662 return -ENOMEM;
3617 } 3663 }
3618 } 3664 }
3619 new.cachep = cachep; 3665 new->cachep = cachep;
3620 3666
3621 on_each_cpu(do_ccupdate_local, (void *)&new, 1, 1); 3667 on_each_cpu(do_ccupdate_local, (void *)new, 1, 1);
3622 3668
3623 check_irq_on(); 3669 check_irq_on();
3624 cachep->batchcount = batchcount; 3670 cachep->batchcount = batchcount;
@@ -3626,7 +3672,7 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
3626 cachep->shared = shared; 3672 cachep->shared = shared;
3627 3673
3628 for_each_online_cpu(i) { 3674 for_each_online_cpu(i) {
3629 struct array_cache *ccold = new.new[i]; 3675 struct array_cache *ccold = new->new[i];
3630 if (!ccold) 3676 if (!ccold)
3631 continue; 3677 continue;
3632 spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); 3678 spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
@@ -3634,18 +3680,12 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
3634 spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); 3680 spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
3635 kfree(ccold); 3681 kfree(ccold);
3636 } 3682 }
3637 3683 kfree(new);
3638 err = alloc_kmemlist(cachep); 3684 return alloc_kmemlist(cachep);
3639 if (err) {
3640 printk(KERN_ERR "alloc_kmemlist failed for %s, error %d.\n",
3641 cachep->name, -err);
3642 BUG();
3643 }
3644 return 0;
3645} 3685}
3646 3686
3647/* Called with cache_chain_mutex held always */ 3687/* Called with cache_chain_mutex held always */
3648static void enable_cpucache(struct kmem_cache *cachep) 3688static int enable_cpucache(struct kmem_cache *cachep)
3649{ 3689{
3650 int err; 3690 int err;
3651 int limit, shared; 3691 int limit, shared;
@@ -3697,6 +3737,7 @@ static void enable_cpucache(struct kmem_cache *cachep)
3697 if (err) 3737 if (err)
3698 printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n", 3738 printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
3699 cachep->name, -err); 3739 cachep->name, -err);
3740 return err;
3700} 3741}
3701 3742
3702/* 3743/*
@@ -4157,6 +4198,7 @@ static int leaks_show(struct seq_file *m, void *p)
4157 show_symbol(m, n[2*i+2]); 4198 show_symbol(m, n[2*i+2]);
4158 seq_putc(m, '\n'); 4199 seq_putc(m, '\n');
4159 } 4200 }
4201
4160 return 0; 4202 return 0;
4161} 4203}
4162 4204