aboutsummaryrefslogtreecommitdiffstats
path: root/mm/slab.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/slab.c')
-rw-r--r--mm/slab.c298
1 files changed, 201 insertions, 97 deletions
diff --git a/mm/slab.c b/mm/slab.c
index 3c4a7e34eddc..068cb4503c15 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -103,12 +103,12 @@
103#include <linux/module.h> 103#include <linux/module.h>
104#include <linux/rcupdate.h> 104#include <linux/rcupdate.h>
105#include <linux/string.h> 105#include <linux/string.h>
106#include <linux/uaccess.h>
106#include <linux/nodemask.h> 107#include <linux/nodemask.h>
107#include <linux/mempolicy.h> 108#include <linux/mempolicy.h>
108#include <linux/mutex.h> 109#include <linux/mutex.h>
109#include <linux/rtmutex.h> 110#include <linux/rtmutex.h>
110 111
111#include <asm/uaccess.h>
112#include <asm/cacheflush.h> 112#include <asm/cacheflush.h>
113#include <asm/tlbflush.h> 113#include <asm/tlbflush.h>
114#include <asm/page.h> 114#include <asm/page.h>
@@ -313,7 +313,7 @@ static int drain_freelist(struct kmem_cache *cache,
313static void free_block(struct kmem_cache *cachep, void **objpp, int len, 313static void free_block(struct kmem_cache *cachep, void **objpp, int len,
314 int node); 314 int node);
315static int enable_cpucache(struct kmem_cache *cachep); 315static int enable_cpucache(struct kmem_cache *cachep);
316static void cache_reap(void *unused); 316static void cache_reap(struct work_struct *unused);
317 317
318/* 318/*
319 * This function must be completely optimized away if a constant is passed to 319 * This function must be completely optimized away if a constant is passed to
@@ -730,7 +730,10 @@ static inline void init_lock_keys(void)
730} 730}
731#endif 731#endif
732 732
733/* Guard access to the cache-chain. */ 733/*
734 * 1. Guard access to the cache-chain.
735 * 2. Protect sanity of cpu_online_map against cpu hotplug events
736 */
734static DEFINE_MUTEX(cache_chain_mutex); 737static DEFINE_MUTEX(cache_chain_mutex);
735static struct list_head cache_chain; 738static struct list_head cache_chain;
736 739
@@ -753,7 +756,7 @@ int slab_is_available(void)
753 return g_cpucache_up == FULL; 756 return g_cpucache_up == FULL;
754} 757}
755 758
756static DEFINE_PER_CPU(struct work_struct, reap_work); 759static DEFINE_PER_CPU(struct delayed_work, reap_work);
757 760
758static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) 761static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
759{ 762{
@@ -866,6 +869,22 @@ static void __slab_error(const char *function, struct kmem_cache *cachep,
866 dump_stack(); 869 dump_stack();
867} 870}
868 871
872/*
873 * By default on NUMA we use alien caches to stage the freeing of
874 * objects allocated from other nodes. This causes massive memory
875 * inefficiencies when using fake NUMA setup to split memory into a
876 * large number of small nodes, so it can be disabled on the command
877 * line
878 */
879
880static int use_alien_caches __read_mostly = 1;
881static int __init noaliencache_setup(char *s)
882{
883 use_alien_caches = 0;
884 return 1;
885}
886__setup("noaliencache", noaliencache_setup);
887
869#ifdef CONFIG_NUMA 888#ifdef CONFIG_NUMA
870/* 889/*
871 * Special reaping functions for NUMA systems called from cache_reap(). 890 * Special reaping functions for NUMA systems called from cache_reap().
@@ -916,16 +935,16 @@ static void next_reap_node(void)
916 */ 935 */
917static void __devinit start_cpu_timer(int cpu) 936static void __devinit start_cpu_timer(int cpu)
918{ 937{
919 struct work_struct *reap_work = &per_cpu(reap_work, cpu); 938 struct delayed_work *reap_work = &per_cpu(reap_work, cpu);
920 939
921 /* 940 /*
922 * When this gets called from do_initcalls via cpucache_init(), 941 * When this gets called from do_initcalls via cpucache_init(),
923 * init_workqueues() has already run, so keventd will be setup 942 * init_workqueues() has already run, so keventd will be setup
924 * at that time. 943 * at that time.
925 */ 944 */
926 if (keventd_up() && reap_work->func == NULL) { 945 if (keventd_up() && reap_work->work.func == NULL) {
927 init_reap_node(cpu); 946 init_reap_node(cpu);
928 INIT_WORK(reap_work, cache_reap, NULL); 947 INIT_DELAYED_WORK(reap_work, cache_reap);
929 schedule_delayed_work_on(cpu, reap_work, HZ + 3 * cpu); 948 schedule_delayed_work_on(cpu, reap_work, HZ + 3 * cpu);
930 } 949 }
931} 950}
@@ -996,7 +1015,7 @@ static inline void *alternate_node_alloc(struct kmem_cache *cachep,
996 return NULL; 1015 return NULL;
997} 1016}
998 1017
999static inline void *__cache_alloc_node(struct kmem_cache *cachep, 1018static inline void *____cache_alloc_node(struct kmem_cache *cachep,
1000 gfp_t flags, int nodeid) 1019 gfp_t flags, int nodeid)
1001{ 1020{
1002 return NULL; 1021 return NULL;
@@ -1004,7 +1023,7 @@ static inline void *__cache_alloc_node(struct kmem_cache *cachep,
1004 1023
1005#else /* CONFIG_NUMA */ 1024#else /* CONFIG_NUMA */
1006 1025
1007static void *__cache_alloc_node(struct kmem_cache *, gfp_t, int); 1026static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int);
1008static void *alternate_node_alloc(struct kmem_cache *, gfp_t); 1027static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
1009 1028
1010static struct array_cache **alloc_alien_cache(int node, int limit) 1029static struct array_cache **alloc_alien_cache(int node, int limit)
@@ -1114,7 +1133,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1114 * Make sure we are not freeing a object from another node to the array 1133 * Make sure we are not freeing a object from another node to the array
1115 * cache on this cpu. 1134 * cache on this cpu.
1116 */ 1135 */
1117 if (likely(slabp->nodeid == node)) 1136 if (likely(slabp->nodeid == node) || unlikely(!use_alien_caches))
1118 return 0; 1137 return 0;
1119 1138
1120 l3 = cachep->nodelists[node]; 1139 l3 = cachep->nodelists[node];
@@ -1192,7 +1211,7 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
1192 list_for_each_entry(cachep, &cache_chain, next) { 1211 list_for_each_entry(cachep, &cache_chain, next) {
1193 struct array_cache *nc; 1212 struct array_cache *nc;
1194 struct array_cache *shared; 1213 struct array_cache *shared;
1195 struct array_cache **alien; 1214 struct array_cache **alien = NULL;
1196 1215
1197 nc = alloc_arraycache(node, cachep->limit, 1216 nc = alloc_arraycache(node, cachep->limit,
1198 cachep->batchcount); 1217 cachep->batchcount);
@@ -1204,9 +1223,11 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
1204 if (!shared) 1223 if (!shared)
1205 goto bad; 1224 goto bad;
1206 1225
1207 alien = alloc_alien_cache(node, cachep->limit); 1226 if (use_alien_caches) {
1208 if (!alien) 1227 alien = alloc_alien_cache(node, cachep->limit);
1209 goto bad; 1228 if (!alien)
1229 goto bad;
1230 }
1210 cachep->array[cpu] = nc; 1231 cachep->array[cpu] = nc;
1211 l3 = cachep->nodelists[node]; 1232 l3 = cachep->nodelists[node];
1212 BUG_ON(!l3); 1233 BUG_ON(!l3);
@@ -1230,12 +1251,18 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
1230 kfree(shared); 1251 kfree(shared);
1231 free_alien_cache(alien); 1252 free_alien_cache(alien);
1232 } 1253 }
1233 mutex_unlock(&cache_chain_mutex);
1234 break; 1254 break;
1235 case CPU_ONLINE: 1255 case CPU_ONLINE:
1256 mutex_unlock(&cache_chain_mutex);
1236 start_cpu_timer(cpu); 1257 start_cpu_timer(cpu);
1237 break; 1258 break;
1238#ifdef CONFIG_HOTPLUG_CPU 1259#ifdef CONFIG_HOTPLUG_CPU
1260 case CPU_DOWN_PREPARE:
1261 mutex_lock(&cache_chain_mutex);
1262 break;
1263 case CPU_DOWN_FAILED:
1264 mutex_unlock(&cache_chain_mutex);
1265 break;
1239 case CPU_DEAD: 1266 case CPU_DEAD:
1240 /* 1267 /*
1241 * Even if all the cpus of a node are down, we don't free the 1268 * Even if all the cpus of a node are down, we don't free the
@@ -1246,8 +1273,8 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
1246 * gets destroyed at kmem_cache_destroy(). 1273 * gets destroyed at kmem_cache_destroy().
1247 */ 1274 */
1248 /* fall thru */ 1275 /* fall thru */
1276#endif
1249 case CPU_UP_CANCELED: 1277 case CPU_UP_CANCELED:
1250 mutex_lock(&cache_chain_mutex);
1251 list_for_each_entry(cachep, &cache_chain, next) { 1278 list_for_each_entry(cachep, &cache_chain, next) {
1252 struct array_cache *nc; 1279 struct array_cache *nc;
1253 struct array_cache *shared; 1280 struct array_cache *shared;
@@ -1308,11 +1335,9 @@ free_array_cache:
1308 } 1335 }
1309 mutex_unlock(&cache_chain_mutex); 1336 mutex_unlock(&cache_chain_mutex);
1310 break; 1337 break;
1311#endif
1312 } 1338 }
1313 return NOTIFY_OK; 1339 return NOTIFY_OK;
1314bad: 1340bad:
1315 mutex_unlock(&cache_chain_mutex);
1316 return NOTIFY_BAD; 1341 return NOTIFY_BAD;
1317} 1342}
1318 1343
@@ -1580,12 +1605,7 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
1580 flags |= __GFP_COMP; 1605 flags |= __GFP_COMP;
1581#endif 1606#endif
1582 1607
1583 /* 1608 flags |= cachep->gfpflags;
1584 * Under NUMA we want memory on the indicated node. We will handle
1585 * the needed fallback ourselves since we want to serve from our
1586 * per node object lists first for other nodes.
1587 */
1588 flags |= cachep->gfpflags | GFP_THISNODE;
1589 1609
1590 page = alloc_pages_node(nodeid, flags, cachep->gfporder); 1610 page = alloc_pages_node(nodeid, flags, cachep->gfporder);
1591 if (!page) 1611 if (!page)
@@ -2098,15 +2118,12 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2098 } 2118 }
2099 2119
2100 /* 2120 /*
2101 * Prevent CPUs from coming and going. 2121 * We use cache_chain_mutex to ensure a consistent view of
2102 * lock_cpu_hotplug() nests outside cache_chain_mutex 2122 * cpu_online_map as well. Please see cpuup_callback
2103 */ 2123 */
2104 lock_cpu_hotplug();
2105
2106 mutex_lock(&cache_chain_mutex); 2124 mutex_lock(&cache_chain_mutex);
2107 2125
2108 list_for_each_entry(pc, &cache_chain, next) { 2126 list_for_each_entry(pc, &cache_chain, next) {
2109 mm_segment_t old_fs = get_fs();
2110 char tmp; 2127 char tmp;
2111 int res; 2128 int res;
2112 2129
@@ -2115,9 +2132,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2115 * destroy its slab cache and no-one else reuses the vmalloc 2132 * destroy its slab cache and no-one else reuses the vmalloc
2116 * area of the module. Print a warning. 2133 * area of the module. Print a warning.
2117 */ 2134 */
2118 set_fs(KERNEL_DS); 2135 res = probe_kernel_address(pc->name, tmp);
2119 res = __get_user(tmp, pc->name);
2120 set_fs(old_fs);
2121 if (res) { 2136 if (res) {
2122 printk("SLAB: cache with size %d has lost its name\n", 2137 printk("SLAB: cache with size %d has lost its name\n",
2123 pc->buffer_size); 2138 pc->buffer_size);
@@ -2197,25 +2212,24 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2197 if (flags & SLAB_RED_ZONE || flags & SLAB_STORE_USER) 2212 if (flags & SLAB_RED_ZONE || flags & SLAB_STORE_USER)
2198 ralign = BYTES_PER_WORD; 2213 ralign = BYTES_PER_WORD;
2199 2214
2200 /* 2) arch mandated alignment: disables debug if necessary */ 2215 /* 2) arch mandated alignment */
2201 if (ralign < ARCH_SLAB_MINALIGN) { 2216 if (ralign < ARCH_SLAB_MINALIGN) {
2202 ralign = ARCH_SLAB_MINALIGN; 2217 ralign = ARCH_SLAB_MINALIGN;
2203 if (ralign > BYTES_PER_WORD)
2204 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
2205 } 2218 }
2206 /* 3) caller mandated alignment: disables debug if necessary */ 2219 /* 3) caller mandated alignment */
2207 if (ralign < align) { 2220 if (ralign < align) {
2208 ralign = align; 2221 ralign = align;
2209 if (ralign > BYTES_PER_WORD)
2210 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
2211 } 2222 }
2223 /* disable debug if necessary */
2224 if (ralign > BYTES_PER_WORD)
2225 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
2212 /* 2226 /*
2213 * 4) Store it. 2227 * 4) Store it.
2214 */ 2228 */
2215 align = ralign; 2229 align = ralign;
2216 2230
2217 /* Get cache's description obj. */ 2231 /* Get cache's description obj. */
2218 cachep = kmem_cache_zalloc(&cache_cache, SLAB_KERNEL); 2232 cachep = kmem_cache_zalloc(&cache_cache, GFP_KERNEL);
2219 if (!cachep) 2233 if (!cachep)
2220 goto oops; 2234 goto oops;
2221 2235
@@ -2326,7 +2340,6 @@ oops:
2326 panic("kmem_cache_create(): failed to create slab `%s'\n", 2340 panic("kmem_cache_create(): failed to create slab `%s'\n",
2327 name); 2341 name);
2328 mutex_unlock(&cache_chain_mutex); 2342 mutex_unlock(&cache_chain_mutex);
2329 unlock_cpu_hotplug();
2330 return cachep; 2343 return cachep;
2331} 2344}
2332EXPORT_SYMBOL(kmem_cache_create); 2345EXPORT_SYMBOL(kmem_cache_create);
@@ -2444,6 +2457,7 @@ out:
2444 return nr_freed; 2457 return nr_freed;
2445} 2458}
2446 2459
2460/* Called with cache_chain_mutex held to protect against cpu hotplug */
2447static int __cache_shrink(struct kmem_cache *cachep) 2461static int __cache_shrink(struct kmem_cache *cachep)
2448{ 2462{
2449 int ret = 0, i = 0; 2463 int ret = 0, i = 0;
@@ -2474,9 +2488,13 @@ static int __cache_shrink(struct kmem_cache *cachep)
2474 */ 2488 */
2475int kmem_cache_shrink(struct kmem_cache *cachep) 2489int kmem_cache_shrink(struct kmem_cache *cachep)
2476{ 2490{
2491 int ret;
2477 BUG_ON(!cachep || in_interrupt()); 2492 BUG_ON(!cachep || in_interrupt());
2478 2493
2479 return __cache_shrink(cachep); 2494 mutex_lock(&cache_chain_mutex);
2495 ret = __cache_shrink(cachep);
2496 mutex_unlock(&cache_chain_mutex);
2497 return ret;
2480} 2498}
2481EXPORT_SYMBOL(kmem_cache_shrink); 2499EXPORT_SYMBOL(kmem_cache_shrink);
2482 2500
@@ -2500,23 +2518,16 @@ void kmem_cache_destroy(struct kmem_cache *cachep)
2500{ 2518{
2501 BUG_ON(!cachep || in_interrupt()); 2519 BUG_ON(!cachep || in_interrupt());
2502 2520
2503 /* Don't let CPUs to come and go */
2504 lock_cpu_hotplug();
2505
2506 /* Find the cache in the chain of caches. */ 2521 /* Find the cache in the chain of caches. */
2507 mutex_lock(&cache_chain_mutex); 2522 mutex_lock(&cache_chain_mutex);
2508 /* 2523 /*
2509 * the chain is never empty, cache_cache is never destroyed 2524 * the chain is never empty, cache_cache is never destroyed
2510 */ 2525 */
2511 list_del(&cachep->next); 2526 list_del(&cachep->next);
2512 mutex_unlock(&cache_chain_mutex);
2513
2514 if (__cache_shrink(cachep)) { 2527 if (__cache_shrink(cachep)) {
2515 slab_error(cachep, "Can't free all objects"); 2528 slab_error(cachep, "Can't free all objects");
2516 mutex_lock(&cache_chain_mutex);
2517 list_add(&cachep->next, &cache_chain); 2529 list_add(&cachep->next, &cache_chain);
2518 mutex_unlock(&cache_chain_mutex); 2530 mutex_unlock(&cache_chain_mutex);
2519 unlock_cpu_hotplug();
2520 return; 2531 return;
2521 } 2532 }
2522 2533
@@ -2524,7 +2535,7 @@ void kmem_cache_destroy(struct kmem_cache *cachep)
2524 synchronize_rcu(); 2535 synchronize_rcu();
2525 2536
2526 __kmem_cache_destroy(cachep); 2537 __kmem_cache_destroy(cachep);
2527 unlock_cpu_hotplug(); 2538 mutex_unlock(&cache_chain_mutex);
2528} 2539}
2529EXPORT_SYMBOL(kmem_cache_destroy); 2540EXPORT_SYMBOL(kmem_cache_destroy);
2530 2541
@@ -2548,7 +2559,7 @@ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
2548 if (OFF_SLAB(cachep)) { 2559 if (OFF_SLAB(cachep)) {
2549 /* Slab management obj is off-slab. */ 2560 /* Slab management obj is off-slab. */
2550 slabp = kmem_cache_alloc_node(cachep->slabp_cache, 2561 slabp = kmem_cache_alloc_node(cachep->slabp_cache,
2551 local_flags, nodeid); 2562 local_flags & ~GFP_THISNODE, nodeid);
2552 if (!slabp) 2563 if (!slabp)
2553 return NULL; 2564 return NULL;
2554 } else { 2565 } else {
@@ -2618,7 +2629,7 @@ static void cache_init_objs(struct kmem_cache *cachep,
2618 2629
2619static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) 2630static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)
2620{ 2631{
2621 if (flags & SLAB_DMA) 2632 if (flags & GFP_DMA)
2622 BUG_ON(!(cachep->gfpflags & GFP_DMA)); 2633 BUG_ON(!(cachep->gfpflags & GFP_DMA));
2623 else 2634 else
2624 BUG_ON(cachep->gfpflags & GFP_DMA); 2635 BUG_ON(cachep->gfpflags & GFP_DMA);
@@ -2689,10 +2700,10 @@ static void slab_map_pages(struct kmem_cache *cache, struct slab *slab,
2689 * Grow (by 1) the number of slabs within a cache. This is called by 2700 * Grow (by 1) the number of slabs within a cache. This is called by
2690 * kmem_cache_alloc() when there are no active objs left in a cache. 2701 * kmem_cache_alloc() when there are no active objs left in a cache.
2691 */ 2702 */
2692static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid) 2703static int cache_grow(struct kmem_cache *cachep,
2704 gfp_t flags, int nodeid, void *objp)
2693{ 2705{
2694 struct slab *slabp; 2706 struct slab *slabp;
2695 void *objp;
2696 size_t offset; 2707 size_t offset;
2697 gfp_t local_flags; 2708 gfp_t local_flags;
2698 unsigned long ctor_flags; 2709 unsigned long ctor_flags;
@@ -2702,12 +2713,12 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid)
2702 * Be lazy and only check for valid flags here, keeping it out of the 2713 * Be lazy and only check for valid flags here, keeping it out of the
2703 * critical path in kmem_cache_alloc(). 2714 * critical path in kmem_cache_alloc().
2704 */ 2715 */
2705 BUG_ON(flags & ~(SLAB_DMA | SLAB_LEVEL_MASK | SLAB_NO_GROW)); 2716 BUG_ON(flags & ~(GFP_DMA | GFP_LEVEL_MASK | __GFP_NO_GROW));
2706 if (flags & SLAB_NO_GROW) 2717 if (flags & __GFP_NO_GROW)
2707 return 0; 2718 return 0;
2708 2719
2709 ctor_flags = SLAB_CTOR_CONSTRUCTOR; 2720 ctor_flags = SLAB_CTOR_CONSTRUCTOR;
2710 local_flags = (flags & SLAB_LEVEL_MASK); 2721 local_flags = (flags & GFP_LEVEL_MASK);
2711 if (!(local_flags & __GFP_WAIT)) 2722 if (!(local_flags & __GFP_WAIT))
2712 /* 2723 /*
2713 * Not allowed to sleep. Need to tell a constructor about 2724 * Not allowed to sleep. Need to tell a constructor about
@@ -2744,12 +2755,14 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid)
2744 * Get mem for the objs. Attempt to allocate a physical page from 2755 * Get mem for the objs. Attempt to allocate a physical page from
2745 * 'nodeid'. 2756 * 'nodeid'.
2746 */ 2757 */
2747 objp = kmem_getpages(cachep, flags, nodeid); 2758 if (!objp)
2759 objp = kmem_getpages(cachep, flags, nodeid);
2748 if (!objp) 2760 if (!objp)
2749 goto failed; 2761 goto failed;
2750 2762
2751 /* Get slab management. */ 2763 /* Get slab management. */
2752 slabp = alloc_slabmgmt(cachep, objp, offset, local_flags, nodeid); 2764 slabp = alloc_slabmgmt(cachep, objp, offset,
2765 local_flags & ~GFP_THISNODE, nodeid);
2753 if (!slabp) 2766 if (!slabp)
2754 goto opps1; 2767 goto opps1;
2755 2768
@@ -2987,7 +3000,7 @@ alloc_done:
2987 3000
2988 if (unlikely(!ac->avail)) { 3001 if (unlikely(!ac->avail)) {
2989 int x; 3002 int x;
2990 x = cache_grow(cachep, flags, node); 3003 x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);
2991 3004
2992 /* cache_grow can reenable interrupts, then ac could change. */ 3005 /* cache_grow can reenable interrupts, then ac could change. */
2993 ac = cpu_cache_get(cachep); 3006 ac = cpu_cache_get(cachep);
@@ -3063,6 +3076,12 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
3063 3076
3064 cachep->ctor(objp, cachep, ctor_flags); 3077 cachep->ctor(objp, cachep, ctor_flags);
3065 } 3078 }
3079#if ARCH_SLAB_MINALIGN
3080 if ((u32)objp & (ARCH_SLAB_MINALIGN-1)) {
3081 printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n",
3082 objp, ARCH_SLAB_MINALIGN);
3083 }
3084#endif
3066 return objp; 3085 return objp;
3067} 3086}
3068#else 3087#else
@@ -3105,10 +3124,10 @@ static __always_inline void *__cache_alloc(struct kmem_cache *cachep,
3105 objp = ____cache_alloc(cachep, flags); 3124 objp = ____cache_alloc(cachep, flags);
3106 /* 3125 /*
3107 * We may just have run out of memory on the local node. 3126 * We may just have run out of memory on the local node.
3108 * __cache_alloc_node() knows how to locate memory on other nodes 3127 * ____cache_alloc_node() knows how to locate memory on other nodes
3109 */ 3128 */
3110 if (NUMA_BUILD && !objp) 3129 if (NUMA_BUILD && !objp)
3111 objp = __cache_alloc_node(cachep, flags, numa_node_id()); 3130 objp = ____cache_alloc_node(cachep, flags, numa_node_id());
3112 local_irq_restore(save_flags); 3131 local_irq_restore(save_flags);
3113 objp = cache_alloc_debugcheck_after(cachep, flags, objp, 3132 objp = cache_alloc_debugcheck_after(cachep, flags, objp,
3114 caller); 3133 caller);
@@ -3135,15 +3154,17 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
3135 else if (current->mempolicy) 3154 else if (current->mempolicy)
3136 nid_alloc = slab_node(current->mempolicy); 3155 nid_alloc = slab_node(current->mempolicy);
3137 if (nid_alloc != nid_here) 3156 if (nid_alloc != nid_here)
3138 return __cache_alloc_node(cachep, flags, nid_alloc); 3157 return ____cache_alloc_node(cachep, flags, nid_alloc);
3139 return NULL; 3158 return NULL;
3140} 3159}
3141 3160
3142/* 3161/*
3143 * Fallback function if there was no memory available and no objects on a 3162 * Fallback function if there was no memory available and no objects on a
3144 * certain node and we are allowed to fall back. We mimick the behavior of 3163 * certain node and fall back is permitted. First we scan all the
3145 * the page allocator. We fall back according to a zonelist determined by 3164 * available nodelists for available objects. If that fails then we
3146 * the policy layer while obeying cpuset constraints. 3165 * perform an allocation without specifying a node. This allows the page
3166 * allocator to do its reclaim / fallback magic. We then insert the
3167 * slab into the proper nodelist and then allocate from it.
3147 */ 3168 */
3148void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) 3169void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
3149{ 3170{
@@ -3151,15 +3172,51 @@ void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
3151 ->node_zonelists[gfp_zone(flags)]; 3172 ->node_zonelists[gfp_zone(flags)];
3152 struct zone **z; 3173 struct zone **z;
3153 void *obj = NULL; 3174 void *obj = NULL;
3175 int nid;
3154 3176
3177retry:
3178 /*
3179 * Look through allowed nodes for objects available
3180 * from existing per node queues.
3181 */
3155 for (z = zonelist->zones; *z && !obj; z++) { 3182 for (z = zonelist->zones; *z && !obj; z++) {
3156 int nid = zone_to_nid(*z); 3183 nid = zone_to_nid(*z);
3184
3185 if (cpuset_zone_allowed(*z, flags) &&
3186 cache->nodelists[nid] &&
3187 cache->nodelists[nid]->free_objects)
3188 obj = ____cache_alloc_node(cache,
3189 flags | GFP_THISNODE, nid);
3190 }
3157 3191
3158 if (zone_idx(*z) <= ZONE_NORMAL && 3192 if (!obj) {
3159 cpuset_zone_allowed(*z, flags) && 3193 /*
3160 cache->nodelists[nid]) 3194 * This allocation will be performed within the constraints
3161 obj = __cache_alloc_node(cache, 3195 * of the current cpuset / memory policy requirements.
3162 flags | __GFP_THISNODE, nid); 3196 * We may trigger various forms of reclaim on the allowed
3197 * set and go into memory reserves if necessary.
3198 */
3199 obj = kmem_getpages(cache, flags, -1);
3200 if (obj) {
3201 /*
3202 * Insert into the appropriate per node queues
3203 */
3204 nid = page_to_nid(virt_to_page(obj));
3205 if (cache_grow(cache, flags, nid, obj)) {
3206 obj = ____cache_alloc_node(cache,
3207 flags | GFP_THISNODE, nid);
3208 if (!obj)
3209 /*
3210 * Another processor may allocate the
3211 * objects in the slab since we are
3212 * not holding any locks.
3213 */
3214 goto retry;
3215 } else {
3216 kmem_freepages(cache, obj);
3217 obj = NULL;
3218 }
3219 }
3163 } 3220 }
3164 return obj; 3221 return obj;
3165} 3222}
@@ -3167,7 +3224,7 @@ void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
3167/* 3224/*
3168 * A interface to enable slab creation on nodeid 3225 * A interface to enable slab creation on nodeid
3169 */ 3226 */
3170static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, 3227static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
3171 int nodeid) 3228 int nodeid)
3172{ 3229{
3173 struct list_head *entry; 3230 struct list_head *entry;
@@ -3216,7 +3273,7 @@ retry:
3216 3273
3217must_grow: 3274must_grow:
3218 spin_unlock(&l3->list_lock); 3275 spin_unlock(&l3->list_lock);
3219 x = cache_grow(cachep, flags, nodeid); 3276 x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL);
3220 if (x) 3277 if (x)
3221 goto retry; 3278 goto retry;
3222 3279
@@ -3434,35 +3491,59 @@ out:
3434 * @flags: See kmalloc(). 3491 * @flags: See kmalloc().
3435 * @nodeid: node number of the target node. 3492 * @nodeid: node number of the target node.
3436 * 3493 *
3437 * Identical to kmem_cache_alloc, except that this function is slow 3494 * Identical to kmem_cache_alloc but it will allocate memory on the given
3438 * and can sleep. And it will allocate memory on the given node, which 3495 * node, which can improve the performance for cpu bound structures.
3439 * can improve the performance for cpu bound structures. 3496 *
3440 * New and improved: it will now make sure that the object gets 3497 * Fallback to other node is possible if __GFP_THISNODE is not set.
3441 * put on the correct node list so that there is no false sharing.
3442 */ 3498 */
3443void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) 3499static __always_inline void *
3500__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
3501 int nodeid, void *caller)
3444{ 3502{
3445 unsigned long save_flags; 3503 unsigned long save_flags;
3446 void *ptr; 3504 void *ptr = NULL;
3447 3505
3448 cache_alloc_debugcheck_before(cachep, flags); 3506 cache_alloc_debugcheck_before(cachep, flags);
3449 local_irq_save(save_flags); 3507 local_irq_save(save_flags);
3450 3508
3451 if (nodeid == -1 || nodeid == numa_node_id() || 3509 if (unlikely(nodeid == -1))
3452 !cachep->nodelists[nodeid]) 3510 nodeid = numa_node_id();
3453 ptr = ____cache_alloc(cachep, flags); 3511
3454 else 3512 if (likely(cachep->nodelists[nodeid])) {
3455 ptr = __cache_alloc_node(cachep, flags, nodeid); 3513 if (nodeid == numa_node_id()) {
3456 local_irq_restore(save_flags); 3514 /*
3515 * Use the locally cached objects if possible.
3516 * However ____cache_alloc does not allow fallback
3517 * to other nodes. It may fail while we still have
3518 * objects on other nodes available.
3519 */
3520 ptr = ____cache_alloc(cachep, flags);
3521 }
3522 if (!ptr) {
3523 /* ___cache_alloc_node can fall back to other nodes */
3524 ptr = ____cache_alloc_node(cachep, flags, nodeid);
3525 }
3526 } else {
3527 /* Node not bootstrapped yet */
3528 if (!(flags & __GFP_THISNODE))
3529 ptr = fallback_alloc(cachep, flags);
3530 }
3457 3531
3458 ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, 3532 local_irq_restore(save_flags);
3459 __builtin_return_address(0)); 3533 ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
3460 3534
3461 return ptr; 3535 return ptr;
3462} 3536}
3537
3538void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
3539{
3540 return __cache_alloc_node(cachep, flags, nodeid,
3541 __builtin_return_address(0));
3542}
3463EXPORT_SYMBOL(kmem_cache_alloc_node); 3543EXPORT_SYMBOL(kmem_cache_alloc_node);
3464 3544
3465void *__kmalloc_node(size_t size, gfp_t flags, int node) 3545static __always_inline void *
3546__do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller)
3466{ 3547{
3467 struct kmem_cache *cachep; 3548 struct kmem_cache *cachep;
3468 3549
@@ -3471,8 +3552,29 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
3471 return NULL; 3552 return NULL;
3472 return kmem_cache_alloc_node(cachep, flags, node); 3553 return kmem_cache_alloc_node(cachep, flags, node);
3473} 3554}
3555
3556#ifdef CONFIG_DEBUG_SLAB
3557void *__kmalloc_node(size_t size, gfp_t flags, int node)
3558{
3559 return __do_kmalloc_node(size, flags, node,
3560 __builtin_return_address(0));
3561}
3474EXPORT_SYMBOL(__kmalloc_node); 3562EXPORT_SYMBOL(__kmalloc_node);
3475#endif 3563
3564void *__kmalloc_node_track_caller(size_t size, gfp_t flags,
3565 int node, void *caller)
3566{
3567 return __do_kmalloc_node(size, flags, node, caller);
3568}
3569EXPORT_SYMBOL(__kmalloc_node_track_caller);
3570#else
3571void *__kmalloc_node(size_t size, gfp_t flags, int node)
3572{
3573 return __do_kmalloc_node(size, flags, node, NULL);
3574}
3575EXPORT_SYMBOL(__kmalloc_node);
3576#endif /* CONFIG_DEBUG_SLAB */
3577#endif /* CONFIG_NUMA */
3476 3578
3477/** 3579/**
3478 * __do_kmalloc - allocate memory 3580 * __do_kmalloc - allocate memory
@@ -3583,13 +3685,15 @@ static int alloc_kmemlist(struct kmem_cache *cachep)
3583 int node; 3685 int node;
3584 struct kmem_list3 *l3; 3686 struct kmem_list3 *l3;
3585 struct array_cache *new_shared; 3687 struct array_cache *new_shared;
3586 struct array_cache **new_alien; 3688 struct array_cache **new_alien = NULL;
3587 3689
3588 for_each_online_node(node) { 3690 for_each_online_node(node) {
3589 3691
3590 new_alien = alloc_alien_cache(node, cachep->limit); 3692 if (use_alien_caches) {
3591 if (!new_alien) 3693 new_alien = alloc_alien_cache(node, cachep->limit);
3592 goto fail; 3694 if (!new_alien)
3695 goto fail;
3696 }
3593 3697
3594 new_shared = alloc_arraycache(node, 3698 new_shared = alloc_arraycache(node,
3595 cachep->shared*cachep->batchcount, 3699 cachep->shared*cachep->batchcount,
@@ -3815,7 +3919,7 @@ void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
3815 * If we cannot acquire the cache chain mutex then just give up - we'll try 3919 * If we cannot acquire the cache chain mutex then just give up - we'll try
3816 * again on the next iteration. 3920 * again on the next iteration.
3817 */ 3921 */
3818static void cache_reap(void *unused) 3922static void cache_reap(struct work_struct *unused)
3819{ 3923{
3820 struct kmem_cache *searchp; 3924 struct kmem_cache *searchp;
3821 struct kmem_list3 *l3; 3925 struct kmem_list3 *l3;
@@ -4038,7 +4142,7 @@ static int s_show(struct seq_file *m, void *p)
4038 * + further values on SMP and with statistics enabled 4142 * + further values on SMP and with statistics enabled
4039 */ 4143 */
4040 4144
4041struct seq_operations slabinfo_op = { 4145const struct seq_operations slabinfo_op = {
4042 .start = s_start, 4146 .start = s_start,
4043 .next = s_next, 4147 .next = s_next,
4044 .stop = s_stop, 4148 .stop = s_stop,
@@ -4236,7 +4340,7 @@ static int leaks_show(struct seq_file *m, void *p)
4236 return 0; 4340 return 0;
4237} 4341}
4238 4342
4239struct seq_operations slabstats_op = { 4343const struct seq_operations slabstats_op = {
4240 .start = leaks_start, 4344 .start = leaks_start,
4241 .next = s_next, 4345 .next = s_next,
4242 .stop = s_stop, 4346 .stop = s_stop,