aboutsummaryrefslogtreecommitdiffstats
path: root/mm/slab.c
diff options
context:
space:
mode:
authorDave Jones <davej@redhat.com>2006-12-12 17:41:41 -0500
committerDave Jones <davej@redhat.com>2006-12-12 17:41:41 -0500
commitc4366889dda8110247be59ca41fddb82951a8c26 (patch)
tree705c1a996bed8fd48ce94ff33ec9fd00f9b94875 /mm/slab.c
parentdb2fb9db5735cc532fd4fc55e94b9a3c3750378e (diff)
parente1036502e5263851259d147771226161e5ccc85a (diff)
Merge ../linus
Conflicts: drivers/cpufreq/cpufreq.c
Diffstat (limited to 'mm/slab.c')
-rw-r--r--mm/slab.c389
1 files changed, 288 insertions, 101 deletions
diff --git a/mm/slab.c b/mm/slab.c
index 266449d604..2c655532f5 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -103,12 +103,13 @@
103#include <linux/module.h> 103#include <linux/module.h>
104#include <linux/rcupdate.h> 104#include <linux/rcupdate.h>
105#include <linux/string.h> 105#include <linux/string.h>
106#include <linux/uaccess.h>
106#include <linux/nodemask.h> 107#include <linux/nodemask.h>
107#include <linux/mempolicy.h> 108#include <linux/mempolicy.h>
108#include <linux/mutex.h> 109#include <linux/mutex.h>
110#include <linux/fault-inject.h>
109#include <linux/rtmutex.h> 111#include <linux/rtmutex.h>
110 112
111#include <asm/uaccess.h>
112#include <asm/cacheflush.h> 113#include <asm/cacheflush.h>
113#include <asm/tlbflush.h> 114#include <asm/tlbflush.h>
114#include <asm/page.h> 115#include <asm/page.h>
@@ -313,7 +314,7 @@ static int drain_freelist(struct kmem_cache *cache,
313static void free_block(struct kmem_cache *cachep, void **objpp, int len, 314static void free_block(struct kmem_cache *cachep, void **objpp, int len,
314 int node); 315 int node);
315static int enable_cpucache(struct kmem_cache *cachep); 316static int enable_cpucache(struct kmem_cache *cachep);
316static void cache_reap(void *unused); 317static void cache_reap(struct work_struct *unused);
317 318
318/* 319/*
319 * This function must be completely optimized away if a constant is passed to 320 * This function must be completely optimized away if a constant is passed to
@@ -730,7 +731,10 @@ static inline void init_lock_keys(void)
730} 731}
731#endif 732#endif
732 733
733/* Guard access to the cache-chain. */ 734/*
735 * 1. Guard access to the cache-chain.
736 * 2. Protect sanity of cpu_online_map against cpu hotplug events
737 */
734static DEFINE_MUTEX(cache_chain_mutex); 738static DEFINE_MUTEX(cache_chain_mutex);
735static struct list_head cache_chain; 739static struct list_head cache_chain;
736 740
@@ -753,7 +757,7 @@ int slab_is_available(void)
753 return g_cpucache_up == FULL; 757 return g_cpucache_up == FULL;
754} 758}
755 759
756static DEFINE_PER_CPU(struct work_struct, reap_work); 760static DEFINE_PER_CPU(struct delayed_work, reap_work);
757 761
758static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) 762static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
759{ 763{
@@ -866,6 +870,22 @@ static void __slab_error(const char *function, struct kmem_cache *cachep,
866 dump_stack(); 870 dump_stack();
867} 871}
868 872
873/*
874 * By default on NUMA we use alien caches to stage the freeing of
875 * objects allocated from other nodes. This causes massive memory
876 * inefficiencies when using fake NUMA setup to split memory into a
877 * large number of small nodes, so it can be disabled on the command
878 * line
879 */
880
881static int use_alien_caches __read_mostly = 1;
882static int __init noaliencache_setup(char *s)
883{
884 use_alien_caches = 0;
885 return 1;
886}
887__setup("noaliencache", noaliencache_setup);
888
869#ifdef CONFIG_NUMA 889#ifdef CONFIG_NUMA
870/* 890/*
871 * Special reaping functions for NUMA systems called from cache_reap(). 891 * Special reaping functions for NUMA systems called from cache_reap().
@@ -883,7 +903,7 @@ static void init_reap_node(int cpu)
883 if (node == MAX_NUMNODES) 903 if (node == MAX_NUMNODES)
884 node = first_node(node_online_map); 904 node = first_node(node_online_map);
885 905
886 __get_cpu_var(reap_node) = node; 906 per_cpu(reap_node, cpu) = node;
887} 907}
888 908
889static void next_reap_node(void) 909static void next_reap_node(void)
@@ -916,17 +936,18 @@ static void next_reap_node(void)
916 */ 936 */
917static void __devinit start_cpu_timer(int cpu) 937static void __devinit start_cpu_timer(int cpu)
918{ 938{
919 struct work_struct *reap_work = &per_cpu(reap_work, cpu); 939 struct delayed_work *reap_work = &per_cpu(reap_work, cpu);
920 940
921 /* 941 /*
922 * When this gets called from do_initcalls via cpucache_init(), 942 * When this gets called from do_initcalls via cpucache_init(),
923 * init_workqueues() has already run, so keventd will be setup 943 * init_workqueues() has already run, so keventd will be setup
924 * at that time. 944 * at that time.
925 */ 945 */
926 if (keventd_up() && reap_work->func == NULL) { 946 if (keventd_up() && reap_work->work.func == NULL) {
927 init_reap_node(cpu); 947 init_reap_node(cpu);
928 INIT_WORK(reap_work, cache_reap, NULL); 948 INIT_DELAYED_WORK(reap_work, cache_reap);
929 schedule_delayed_work_on(cpu, reap_work, HZ + 3 * cpu); 949 schedule_delayed_work_on(cpu, reap_work,
950 __round_jiffies_relative(HZ, cpu));
930 } 951 }
931} 952}
932 953
@@ -996,7 +1017,7 @@ static inline void *alternate_node_alloc(struct kmem_cache *cachep,
996 return NULL; 1017 return NULL;
997} 1018}
998 1019
999static inline void *__cache_alloc_node(struct kmem_cache *cachep, 1020static inline void *____cache_alloc_node(struct kmem_cache *cachep,
1000 gfp_t flags, int nodeid) 1021 gfp_t flags, int nodeid)
1001{ 1022{
1002 return NULL; 1023 return NULL;
@@ -1004,7 +1025,7 @@ static inline void *__cache_alloc_node(struct kmem_cache *cachep,
1004 1025
1005#else /* CONFIG_NUMA */ 1026#else /* CONFIG_NUMA */
1006 1027
1007static void *__cache_alloc_node(struct kmem_cache *, gfp_t, int); 1028static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int);
1008static void *alternate_node_alloc(struct kmem_cache *, gfp_t); 1029static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
1009 1030
1010static struct array_cache **alloc_alien_cache(int node, int limit) 1031static struct array_cache **alloc_alien_cache(int node, int limit)
@@ -1114,7 +1135,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1114 * Make sure we are not freeing a object from another node to the array 1135 * Make sure we are not freeing a object from another node to the array
1115 * cache on this cpu. 1136 * cache on this cpu.
1116 */ 1137 */
1117 if (likely(slabp->nodeid == node)) 1138 if (likely(slabp->nodeid == node) || unlikely(!use_alien_caches))
1118 return 0; 1139 return 0;
1119 1140
1120 l3 = cachep->nodelists[node]; 1141 l3 = cachep->nodelists[node];
@@ -1192,7 +1213,7 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
1192 list_for_each_entry(cachep, &cache_chain, next) { 1213 list_for_each_entry(cachep, &cache_chain, next) {
1193 struct array_cache *nc; 1214 struct array_cache *nc;
1194 struct array_cache *shared; 1215 struct array_cache *shared;
1195 struct array_cache **alien; 1216 struct array_cache **alien = NULL;
1196 1217
1197 nc = alloc_arraycache(node, cachep->limit, 1218 nc = alloc_arraycache(node, cachep->limit,
1198 cachep->batchcount); 1219 cachep->batchcount);
@@ -1204,9 +1225,11 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
1204 if (!shared) 1225 if (!shared)
1205 goto bad; 1226 goto bad;
1206 1227
1207 alien = alloc_alien_cache(node, cachep->limit); 1228 if (use_alien_caches) {
1208 if (!alien) 1229 alien = alloc_alien_cache(node, cachep->limit);
1209 goto bad; 1230 if (!alien)
1231 goto bad;
1232 }
1210 cachep->array[cpu] = nc; 1233 cachep->array[cpu] = nc;
1211 l3 = cachep->nodelists[node]; 1234 l3 = cachep->nodelists[node];
1212 BUG_ON(!l3); 1235 BUG_ON(!l3);
@@ -1230,12 +1253,18 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
1230 kfree(shared); 1253 kfree(shared);
1231 free_alien_cache(alien); 1254 free_alien_cache(alien);
1232 } 1255 }
1233 mutex_unlock(&cache_chain_mutex);
1234 break; 1256 break;
1235 case CPU_ONLINE: 1257 case CPU_ONLINE:
1258 mutex_unlock(&cache_chain_mutex);
1236 start_cpu_timer(cpu); 1259 start_cpu_timer(cpu);
1237 break; 1260 break;
1238#ifdef CONFIG_HOTPLUG_CPU 1261#ifdef CONFIG_HOTPLUG_CPU
1262 case CPU_DOWN_PREPARE:
1263 mutex_lock(&cache_chain_mutex);
1264 break;
1265 case CPU_DOWN_FAILED:
1266 mutex_unlock(&cache_chain_mutex);
1267 break;
1239 case CPU_DEAD: 1268 case CPU_DEAD:
1240 /* 1269 /*
1241 * Even if all the cpus of a node are down, we don't free the 1270 * Even if all the cpus of a node are down, we don't free the
@@ -1246,8 +1275,8 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
1246 * gets destroyed at kmem_cache_destroy(). 1275 * gets destroyed at kmem_cache_destroy().
1247 */ 1276 */
1248 /* fall thru */ 1277 /* fall thru */
1278#endif
1249 case CPU_UP_CANCELED: 1279 case CPU_UP_CANCELED:
1250 mutex_lock(&cache_chain_mutex);
1251 list_for_each_entry(cachep, &cache_chain, next) { 1280 list_for_each_entry(cachep, &cache_chain, next) {
1252 struct array_cache *nc; 1281 struct array_cache *nc;
1253 struct array_cache *shared; 1282 struct array_cache *shared;
@@ -1308,11 +1337,9 @@ free_array_cache:
1308 } 1337 }
1309 mutex_unlock(&cache_chain_mutex); 1338 mutex_unlock(&cache_chain_mutex);
1310 break; 1339 break;
1311#endif
1312 } 1340 }
1313 return NOTIFY_OK; 1341 return NOTIFY_OK;
1314bad: 1342bad:
1315 mutex_unlock(&cache_chain_mutex);
1316 return NOTIFY_BAD; 1343 return NOTIFY_BAD;
1317} 1344}
1318 1345
@@ -1580,12 +1607,7 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
1580 flags |= __GFP_COMP; 1607 flags |= __GFP_COMP;
1581#endif 1608#endif
1582 1609
1583 /* 1610 flags |= cachep->gfpflags;
1584 * Under NUMA we want memory on the indicated node. We will handle
1585 * the needed fallback ourselves since we want to serve from our
1586 * per node object lists first for other nodes.
1587 */
1588 flags |= cachep->gfpflags | GFP_THISNODE;
1589 1611
1590 page = alloc_pages_node(nodeid, flags, cachep->gfporder); 1612 page = alloc_pages_node(nodeid, flags, cachep->gfporder);
1591 if (!page) 1613 if (!page)
@@ -2098,15 +2120,12 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2098 } 2120 }
2099 2121
2100 /* 2122 /*
2101 * Prevent CPUs from coming and going. 2123 * We use cache_chain_mutex to ensure a consistent view of
2102 * lock_cpu_hotplug() nests outside cache_chain_mutex 2124 * cpu_online_map as well. Please see cpuup_callback
2103 */ 2125 */
2104 lock_cpu_hotplug();
2105
2106 mutex_lock(&cache_chain_mutex); 2126 mutex_lock(&cache_chain_mutex);
2107 2127
2108 list_for_each_entry(pc, &cache_chain, next) { 2128 list_for_each_entry(pc, &cache_chain, next) {
2109 mm_segment_t old_fs = get_fs();
2110 char tmp; 2129 char tmp;
2111 int res; 2130 int res;
2112 2131
@@ -2115,9 +2134,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2115 * destroy its slab cache and no-one else reuses the vmalloc 2134 * destroy its slab cache and no-one else reuses the vmalloc
2116 * area of the module. Print a warning. 2135 * area of the module. Print a warning.
2117 */ 2136 */
2118 set_fs(KERNEL_DS); 2137 res = probe_kernel_address(pc->name, tmp);
2119 res = __get_user(tmp, pc->name);
2120 set_fs(old_fs);
2121 if (res) { 2138 if (res) {
2122 printk("SLAB: cache with size %d has lost its name\n", 2139 printk("SLAB: cache with size %d has lost its name\n",
2123 pc->buffer_size); 2140 pc->buffer_size);
@@ -2197,25 +2214,24 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2197 if (flags & SLAB_RED_ZONE || flags & SLAB_STORE_USER) 2214 if (flags & SLAB_RED_ZONE || flags & SLAB_STORE_USER)
2198 ralign = BYTES_PER_WORD; 2215 ralign = BYTES_PER_WORD;
2199 2216
2200 /* 2) arch mandated alignment: disables debug if necessary */ 2217 /* 2) arch mandated alignment */
2201 if (ralign < ARCH_SLAB_MINALIGN) { 2218 if (ralign < ARCH_SLAB_MINALIGN) {
2202 ralign = ARCH_SLAB_MINALIGN; 2219 ralign = ARCH_SLAB_MINALIGN;
2203 if (ralign > BYTES_PER_WORD)
2204 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
2205 } 2220 }
2206 /* 3) caller mandated alignment: disables debug if necessary */ 2221 /* 3) caller mandated alignment */
2207 if (ralign < align) { 2222 if (ralign < align) {
2208 ralign = align; 2223 ralign = align;
2209 if (ralign > BYTES_PER_WORD)
2210 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
2211 } 2224 }
2225 /* disable debug if necessary */
2226 if (ralign > BYTES_PER_WORD)
2227 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
2212 /* 2228 /*
2213 * 4) Store it. 2229 * 4) Store it.
2214 */ 2230 */
2215 align = ralign; 2231 align = ralign;
2216 2232
2217 /* Get cache's description obj. */ 2233 /* Get cache's description obj. */
2218 cachep = kmem_cache_zalloc(&cache_cache, SLAB_KERNEL); 2234 cachep = kmem_cache_zalloc(&cache_cache, GFP_KERNEL);
2219 if (!cachep) 2235 if (!cachep)
2220 goto oops; 2236 goto oops;
2221 2237
@@ -2326,7 +2342,6 @@ oops:
2326 panic("kmem_cache_create(): failed to create slab `%s'\n", 2342 panic("kmem_cache_create(): failed to create slab `%s'\n",
2327 name); 2343 name);
2328 mutex_unlock(&cache_chain_mutex); 2344 mutex_unlock(&cache_chain_mutex);
2329 unlock_cpu_hotplug();
2330 return cachep; 2345 return cachep;
2331} 2346}
2332EXPORT_SYMBOL(kmem_cache_create); 2347EXPORT_SYMBOL(kmem_cache_create);
@@ -2444,6 +2459,7 @@ out:
2444 return nr_freed; 2459 return nr_freed;
2445} 2460}
2446 2461
2462/* Called with cache_chain_mutex held to protect against cpu hotplug */
2447static int __cache_shrink(struct kmem_cache *cachep) 2463static int __cache_shrink(struct kmem_cache *cachep)
2448{ 2464{
2449 int ret = 0, i = 0; 2465 int ret = 0, i = 0;
@@ -2474,9 +2490,13 @@ static int __cache_shrink(struct kmem_cache *cachep)
2474 */ 2490 */
2475int kmem_cache_shrink(struct kmem_cache *cachep) 2491int kmem_cache_shrink(struct kmem_cache *cachep)
2476{ 2492{
2493 int ret;
2477 BUG_ON(!cachep || in_interrupt()); 2494 BUG_ON(!cachep || in_interrupt());
2478 2495
2479 return __cache_shrink(cachep); 2496 mutex_lock(&cache_chain_mutex);
2497 ret = __cache_shrink(cachep);
2498 mutex_unlock(&cache_chain_mutex);
2499 return ret;
2480} 2500}
2481EXPORT_SYMBOL(kmem_cache_shrink); 2501EXPORT_SYMBOL(kmem_cache_shrink);
2482 2502
@@ -2500,23 +2520,16 @@ void kmem_cache_destroy(struct kmem_cache *cachep)
2500{ 2520{
2501 BUG_ON(!cachep || in_interrupt()); 2521 BUG_ON(!cachep || in_interrupt());
2502 2522
2503 /* Don't let CPUs to come and go */
2504 lock_cpu_hotplug();
2505
2506 /* Find the cache in the chain of caches. */ 2523 /* Find the cache in the chain of caches. */
2507 mutex_lock(&cache_chain_mutex); 2524 mutex_lock(&cache_chain_mutex);
2508 /* 2525 /*
2509 * the chain is never empty, cache_cache is never destroyed 2526 * the chain is never empty, cache_cache is never destroyed
2510 */ 2527 */
2511 list_del(&cachep->next); 2528 list_del(&cachep->next);
2512 mutex_unlock(&cache_chain_mutex);
2513
2514 if (__cache_shrink(cachep)) { 2529 if (__cache_shrink(cachep)) {
2515 slab_error(cachep, "Can't free all objects"); 2530 slab_error(cachep, "Can't free all objects");
2516 mutex_lock(&cache_chain_mutex);
2517 list_add(&cachep->next, &cache_chain); 2531 list_add(&cachep->next, &cache_chain);
2518 mutex_unlock(&cache_chain_mutex); 2532 mutex_unlock(&cache_chain_mutex);
2519 unlock_cpu_hotplug();
2520 return; 2533 return;
2521 } 2534 }
2522 2535
@@ -2524,7 +2537,7 @@ void kmem_cache_destroy(struct kmem_cache *cachep)
2524 synchronize_rcu(); 2537 synchronize_rcu();
2525 2538
2526 __kmem_cache_destroy(cachep); 2539 __kmem_cache_destroy(cachep);
2527 unlock_cpu_hotplug(); 2540 mutex_unlock(&cache_chain_mutex);
2528} 2541}
2529EXPORT_SYMBOL(kmem_cache_destroy); 2542EXPORT_SYMBOL(kmem_cache_destroy);
2530 2543
@@ -2548,7 +2561,7 @@ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
2548 if (OFF_SLAB(cachep)) { 2561 if (OFF_SLAB(cachep)) {
2549 /* Slab management obj is off-slab. */ 2562 /* Slab management obj is off-slab. */
2550 slabp = kmem_cache_alloc_node(cachep->slabp_cache, 2563 slabp = kmem_cache_alloc_node(cachep->slabp_cache,
2551 local_flags, nodeid); 2564 local_flags & ~GFP_THISNODE, nodeid);
2552 if (!slabp) 2565 if (!slabp)
2553 return NULL; 2566 return NULL;
2554 } else { 2567 } else {
@@ -2618,7 +2631,7 @@ static void cache_init_objs(struct kmem_cache *cachep,
2618 2631
2619static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) 2632static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)
2620{ 2633{
2621 if (flags & SLAB_DMA) 2634 if (flags & GFP_DMA)
2622 BUG_ON(!(cachep->gfpflags & GFP_DMA)); 2635 BUG_ON(!(cachep->gfpflags & GFP_DMA));
2623 else 2636 else
2624 BUG_ON(cachep->gfpflags & GFP_DMA); 2637 BUG_ON(cachep->gfpflags & GFP_DMA);
@@ -2689,10 +2702,10 @@ static void slab_map_pages(struct kmem_cache *cache, struct slab *slab,
2689 * Grow (by 1) the number of slabs within a cache. This is called by 2702 * Grow (by 1) the number of slabs within a cache. This is called by
2690 * kmem_cache_alloc() when there are no active objs left in a cache. 2703 * kmem_cache_alloc() when there are no active objs left in a cache.
2691 */ 2704 */
2692static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid) 2705static int cache_grow(struct kmem_cache *cachep,
2706 gfp_t flags, int nodeid, void *objp)
2693{ 2707{
2694 struct slab *slabp; 2708 struct slab *slabp;
2695 void *objp;
2696 size_t offset; 2709 size_t offset;
2697 gfp_t local_flags; 2710 gfp_t local_flags;
2698 unsigned long ctor_flags; 2711 unsigned long ctor_flags;
@@ -2702,12 +2715,12 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid)
2702 * Be lazy and only check for valid flags here, keeping it out of the 2715 * Be lazy and only check for valid flags here, keeping it out of the
2703 * critical path in kmem_cache_alloc(). 2716 * critical path in kmem_cache_alloc().
2704 */ 2717 */
2705 BUG_ON(flags & ~(SLAB_DMA | SLAB_LEVEL_MASK | SLAB_NO_GROW)); 2718 BUG_ON(flags & ~(GFP_DMA | GFP_LEVEL_MASK | __GFP_NO_GROW));
2706 if (flags & SLAB_NO_GROW) 2719 if (flags & __GFP_NO_GROW)
2707 return 0; 2720 return 0;
2708 2721
2709 ctor_flags = SLAB_CTOR_CONSTRUCTOR; 2722 ctor_flags = SLAB_CTOR_CONSTRUCTOR;
2710 local_flags = (flags & SLAB_LEVEL_MASK); 2723 local_flags = (flags & GFP_LEVEL_MASK);
2711 if (!(local_flags & __GFP_WAIT)) 2724 if (!(local_flags & __GFP_WAIT))
2712 /* 2725 /*
2713 * Not allowed to sleep. Need to tell a constructor about 2726 * Not allowed to sleep. Need to tell a constructor about
@@ -2744,12 +2757,14 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid)
2744 * Get mem for the objs. Attempt to allocate a physical page from 2757 * Get mem for the objs. Attempt to allocate a physical page from
2745 * 'nodeid'. 2758 * 'nodeid'.
2746 */ 2759 */
2747 objp = kmem_getpages(cachep, flags, nodeid); 2760 if (!objp)
2761 objp = kmem_getpages(cachep, flags, nodeid);
2748 if (!objp) 2762 if (!objp)
2749 goto failed; 2763 goto failed;
2750 2764
2751 /* Get slab management. */ 2765 /* Get slab management. */
2752 slabp = alloc_slabmgmt(cachep, objp, offset, local_flags, nodeid); 2766 slabp = alloc_slabmgmt(cachep, objp, offset,
2767 local_flags & ~GFP_THISNODE, nodeid);
2753 if (!slabp) 2768 if (!slabp)
2754 goto opps1; 2769 goto opps1;
2755 2770
@@ -2987,7 +3002,7 @@ alloc_done:
2987 3002
2988 if (unlikely(!ac->avail)) { 3003 if (unlikely(!ac->avail)) {
2989 int x; 3004 int x;
2990 x = cache_grow(cachep, flags, node); 3005 x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);
2991 3006
2992 /* cache_grow can reenable interrupts, then ac could change. */ 3007 /* cache_grow can reenable interrupts, then ac could change. */
2993 ac = cpu_cache_get(cachep); 3008 ac = cpu_cache_get(cachep);
@@ -3063,18 +3078,101 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
3063 3078
3064 cachep->ctor(objp, cachep, ctor_flags); 3079 cachep->ctor(objp, cachep, ctor_flags);
3065 } 3080 }
3081#if ARCH_SLAB_MINALIGN
3082 if ((u32)objp & (ARCH_SLAB_MINALIGN-1)) {
3083 printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n",
3084 objp, ARCH_SLAB_MINALIGN);
3085 }
3086#endif
3066 return objp; 3087 return objp;
3067} 3088}
3068#else 3089#else
3069#define cache_alloc_debugcheck_after(a,b,objp,d) (objp) 3090#define cache_alloc_debugcheck_after(a,b,objp,d) (objp)
3070#endif 3091#endif
3071 3092
3093#ifdef CONFIG_FAILSLAB
3094
3095static struct failslab_attr {
3096
3097 struct fault_attr attr;
3098
3099 u32 ignore_gfp_wait;
3100#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
3101 struct dentry *ignore_gfp_wait_file;
3102#endif
3103
3104} failslab = {
3105 .attr = FAULT_ATTR_INITIALIZER,
3106 .ignore_gfp_wait = 1,
3107};
3108
3109static int __init setup_failslab(char *str)
3110{
3111 return setup_fault_attr(&failslab.attr, str);
3112}
3113__setup("failslab=", setup_failslab);
3114
3115static int should_failslab(struct kmem_cache *cachep, gfp_t flags)
3116{
3117 if (cachep == &cache_cache)
3118 return 0;
3119 if (flags & __GFP_NOFAIL)
3120 return 0;
3121 if (failslab.ignore_gfp_wait && (flags & __GFP_WAIT))
3122 return 0;
3123
3124 return should_fail(&failslab.attr, obj_size(cachep));
3125}
3126
3127#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
3128
3129static int __init failslab_debugfs(void)
3130{
3131 mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
3132 struct dentry *dir;
3133 int err;
3134
3135 err = init_fault_attr_dentries(&failslab.attr, "failslab");
3136 if (err)
3137 return err;
3138 dir = failslab.attr.dentries.dir;
3139
3140 failslab.ignore_gfp_wait_file =
3141 debugfs_create_bool("ignore-gfp-wait", mode, dir,
3142 &failslab.ignore_gfp_wait);
3143
3144 if (!failslab.ignore_gfp_wait_file) {
3145 err = -ENOMEM;
3146 debugfs_remove(failslab.ignore_gfp_wait_file);
3147 cleanup_fault_attr_dentries(&failslab.attr);
3148 }
3149
3150 return err;
3151}
3152
3153late_initcall(failslab_debugfs);
3154
3155#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
3156
3157#else /* CONFIG_FAILSLAB */
3158
3159static inline int should_failslab(struct kmem_cache *cachep, gfp_t flags)
3160{
3161 return 0;
3162}
3163
3164#endif /* CONFIG_FAILSLAB */
3165
3072static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) 3166static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3073{ 3167{
3074 void *objp; 3168 void *objp;
3075 struct array_cache *ac; 3169 struct array_cache *ac;
3076 3170
3077 check_irq_off(); 3171 check_irq_off();
3172
3173 if (should_failslab(cachep, flags))
3174 return NULL;
3175
3078 ac = cpu_cache_get(cachep); 3176 ac = cpu_cache_get(cachep);
3079 if (likely(ac->avail)) { 3177 if (likely(ac->avail)) {
3080 STATS_INC_ALLOCHIT(cachep); 3178 STATS_INC_ALLOCHIT(cachep);
@@ -3105,10 +3203,10 @@ static __always_inline void *__cache_alloc(struct kmem_cache *cachep,
3105 objp = ____cache_alloc(cachep, flags); 3203 objp = ____cache_alloc(cachep, flags);
3106 /* 3204 /*
3107 * We may just have run out of memory on the local node. 3205 * We may just have run out of memory on the local node.
3108 * __cache_alloc_node() knows how to locate memory on other nodes 3206 * ____cache_alloc_node() knows how to locate memory on other nodes
3109 */ 3207 */
3110 if (NUMA_BUILD && !objp) 3208 if (NUMA_BUILD && !objp)
3111 objp = __cache_alloc_node(cachep, flags, numa_node_id()); 3209 objp = ____cache_alloc_node(cachep, flags, numa_node_id());
3112 local_irq_restore(save_flags); 3210 local_irq_restore(save_flags);
3113 objp = cache_alloc_debugcheck_after(cachep, flags, objp, 3211 objp = cache_alloc_debugcheck_after(cachep, flags, objp,
3114 caller); 3212 caller);
@@ -3135,15 +3233,17 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
3135 else if (current->mempolicy) 3233 else if (current->mempolicy)
3136 nid_alloc = slab_node(current->mempolicy); 3234 nid_alloc = slab_node(current->mempolicy);
3137 if (nid_alloc != nid_here) 3235 if (nid_alloc != nid_here)
3138 return __cache_alloc_node(cachep, flags, nid_alloc); 3236 return ____cache_alloc_node(cachep, flags, nid_alloc);
3139 return NULL; 3237 return NULL;
3140} 3238}
3141 3239
3142/* 3240/*
3143 * Fallback function if there was no memory available and no objects on a 3241 * Fallback function if there was no memory available and no objects on a
3144 * certain node and we are allowed to fall back. We mimick the behavior of 3242 * certain node and fall back is permitted. First we scan all the
3145 * the page allocator. We fall back according to a zonelist determined by 3243 * available nodelists for available objects. If that fails then we
3146 * the policy layer while obeying cpuset constraints. 3244 * perform an allocation without specifying a node. This allows the page
3245 * allocator to do its reclaim / fallback magic. We then insert the
3246 * slab into the proper nodelist and then allocate from it.
3147 */ 3247 */
3148void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) 3248void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
3149{ 3249{
@@ -3151,20 +3251,59 @@ void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
3151 ->node_zonelists[gfp_zone(flags)]; 3251 ->node_zonelists[gfp_zone(flags)];
3152 struct zone **z; 3252 struct zone **z;
3153 void *obj = NULL; 3253 void *obj = NULL;
3254 int nid;
3154 3255
3155 for (z = zonelist->zones; *z && !obj; z++) 3256retry:
3156 if (zone_idx(*z) <= ZONE_NORMAL && 3257 /*
3157 cpuset_zone_allowed(*z, flags)) 3258 * Look through allowed nodes for objects available
3158 obj = __cache_alloc_node(cache, 3259 * from existing per node queues.
3159 flags | __GFP_THISNODE, 3260 */
3160 zone_to_nid(*z)); 3261 for (z = zonelist->zones; *z && !obj; z++) {
3262 nid = zone_to_nid(*z);
3263
3264 if (cpuset_zone_allowed(*z, flags | __GFP_HARDWALL) &&
3265 cache->nodelists[nid] &&
3266 cache->nodelists[nid]->free_objects)
3267 obj = ____cache_alloc_node(cache,
3268 flags | GFP_THISNODE, nid);
3269 }
3270
3271 if (!obj) {
3272 /*
3273 * This allocation will be performed within the constraints
3274 * of the current cpuset / memory policy requirements.
3275 * We may trigger various forms of reclaim on the allowed
3276 * set and go into memory reserves if necessary.
3277 */
3278 obj = kmem_getpages(cache, flags, -1);
3279 if (obj) {
3280 /*
3281 * Insert into the appropriate per node queues
3282 */
3283 nid = page_to_nid(virt_to_page(obj));
3284 if (cache_grow(cache, flags, nid, obj)) {
3285 obj = ____cache_alloc_node(cache,
3286 flags | GFP_THISNODE, nid);
3287 if (!obj)
3288 /*
3289 * Another processor may allocate the
3290 * objects in the slab since we are
3291 * not holding any locks.
3292 */
3293 goto retry;
3294 } else {
3295 kmem_freepages(cache, obj);
3296 obj = NULL;
3297 }
3298 }
3299 }
3161 return obj; 3300 return obj;
3162} 3301}
3163 3302
3164/* 3303/*
3165 * A interface to enable slab creation on nodeid 3304 * A interface to enable slab creation on nodeid
3166 */ 3305 */
3167static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, 3306static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
3168 int nodeid) 3307 int nodeid)
3169{ 3308{
3170 struct list_head *entry; 3309 struct list_head *entry;
@@ -3213,7 +3352,7 @@ retry:
3213 3352
3214must_grow: 3353must_grow:
3215 spin_unlock(&l3->list_lock); 3354 spin_unlock(&l3->list_lock);
3216 x = cache_grow(cachep, flags, nodeid); 3355 x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL);
3217 if (x) 3356 if (x)
3218 goto retry; 3357 goto retry;
3219 3358
@@ -3431,35 +3570,59 @@ out:
3431 * @flags: See kmalloc(). 3570 * @flags: See kmalloc().
3432 * @nodeid: node number of the target node. 3571 * @nodeid: node number of the target node.
3433 * 3572 *
3434 * Identical to kmem_cache_alloc, except that this function is slow 3573 * Identical to kmem_cache_alloc but it will allocate memory on the given
3435 * and can sleep. And it will allocate memory on the given node, which 3574 * node, which can improve the performance for cpu bound structures.
3436 * can improve the performance for cpu bound structures. 3575 *
3437 * New and improved: it will now make sure that the object gets 3576 * Fallback to other node is possible if __GFP_THISNODE is not set.
3438 * put on the correct node list so that there is no false sharing.
3439 */ 3577 */
3440void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) 3578static __always_inline void *
3579__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
3580 int nodeid, void *caller)
3441{ 3581{
3442 unsigned long save_flags; 3582 unsigned long save_flags;
3443 void *ptr; 3583 void *ptr = NULL;
3444 3584
3445 cache_alloc_debugcheck_before(cachep, flags); 3585 cache_alloc_debugcheck_before(cachep, flags);
3446 local_irq_save(save_flags); 3586 local_irq_save(save_flags);
3447 3587
3448 if (nodeid == -1 || nodeid == numa_node_id() || 3588 if (unlikely(nodeid == -1))
3449 !cachep->nodelists[nodeid]) 3589 nodeid = numa_node_id();
3450 ptr = ____cache_alloc(cachep, flags);
3451 else
3452 ptr = __cache_alloc_node(cachep, flags, nodeid);
3453 local_irq_restore(save_flags);
3454 3590
3455 ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, 3591 if (likely(cachep->nodelists[nodeid])) {
3456 __builtin_return_address(0)); 3592 if (nodeid == numa_node_id()) {
3593 /*
3594 * Use the locally cached objects if possible.
3595 * However ____cache_alloc does not allow fallback
3596 * to other nodes. It may fail while we still have
3597 * objects on other nodes available.
3598 */
3599 ptr = ____cache_alloc(cachep, flags);
3600 }
3601 if (!ptr) {
3602 /* ___cache_alloc_node can fall back to other nodes */
3603 ptr = ____cache_alloc_node(cachep, flags, nodeid);
3604 }
3605 } else {
3606 /* Node not bootstrapped yet */
3607 if (!(flags & __GFP_THISNODE))
3608 ptr = fallback_alloc(cachep, flags);
3609 }
3610
3611 local_irq_restore(save_flags);
3612 ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
3457 3613
3458 return ptr; 3614 return ptr;
3459} 3615}
3616
3617void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
3618{
3619 return __cache_alloc_node(cachep, flags, nodeid,
3620 __builtin_return_address(0));
3621}
3460EXPORT_SYMBOL(kmem_cache_alloc_node); 3622EXPORT_SYMBOL(kmem_cache_alloc_node);
3461 3623
3462void *__kmalloc_node(size_t size, gfp_t flags, int node) 3624static __always_inline void *
3625__do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller)
3463{ 3626{
3464 struct kmem_cache *cachep; 3627 struct kmem_cache *cachep;
3465 3628
@@ -3468,8 +3631,29 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
3468 return NULL; 3631 return NULL;
3469 return kmem_cache_alloc_node(cachep, flags, node); 3632 return kmem_cache_alloc_node(cachep, flags, node);
3470} 3633}
3634
3635#ifdef CONFIG_DEBUG_SLAB
3636void *__kmalloc_node(size_t size, gfp_t flags, int node)
3637{
3638 return __do_kmalloc_node(size, flags, node,
3639 __builtin_return_address(0));
3640}
3471EXPORT_SYMBOL(__kmalloc_node); 3641EXPORT_SYMBOL(__kmalloc_node);
3472#endif 3642
3643void *__kmalloc_node_track_caller(size_t size, gfp_t flags,
3644 int node, void *caller)
3645{
3646 return __do_kmalloc_node(size, flags, node, caller);
3647}
3648EXPORT_SYMBOL(__kmalloc_node_track_caller);
3649#else
3650void *__kmalloc_node(size_t size, gfp_t flags, int node)
3651{
3652 return __do_kmalloc_node(size, flags, node, NULL);
3653}
3654EXPORT_SYMBOL(__kmalloc_node);
3655#endif /* CONFIG_DEBUG_SLAB */
3656#endif /* CONFIG_NUMA */
3473 3657
3474/** 3658/**
3475 * __do_kmalloc - allocate memory 3659 * __do_kmalloc - allocate memory
@@ -3580,13 +3764,15 @@ static int alloc_kmemlist(struct kmem_cache *cachep)
3580 int node; 3764 int node;
3581 struct kmem_list3 *l3; 3765 struct kmem_list3 *l3;
3582 struct array_cache *new_shared; 3766 struct array_cache *new_shared;
3583 struct array_cache **new_alien; 3767 struct array_cache **new_alien = NULL;
3584 3768
3585 for_each_online_node(node) { 3769 for_each_online_node(node) {
3586 3770
3587 new_alien = alloc_alien_cache(node, cachep->limit); 3771 if (use_alien_caches) {
3588 if (!new_alien) 3772 new_alien = alloc_alien_cache(node, cachep->limit);
3589 goto fail; 3773 if (!new_alien)
3774 goto fail;
3775 }
3590 3776
3591 new_shared = alloc_arraycache(node, 3777 new_shared = alloc_arraycache(node,
3592 cachep->shared*cachep->batchcount, 3778 cachep->shared*cachep->batchcount,
@@ -3812,7 +3998,7 @@ void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
3812 * If we cannot acquire the cache chain mutex then just give up - we'll try 3998 * If we cannot acquire the cache chain mutex then just give up - we'll try
3813 * again on the next iteration. 3999 * again on the next iteration.
3814 */ 4000 */
3815static void cache_reap(void *unused) 4001static void cache_reap(struct work_struct *unused)
3816{ 4002{
3817 struct kmem_cache *searchp; 4003 struct kmem_cache *searchp;
3818 struct kmem_list3 *l3; 4004 struct kmem_list3 *l3;
@@ -3821,7 +4007,7 @@ static void cache_reap(void *unused)
3821 if (!mutex_trylock(&cache_chain_mutex)) { 4007 if (!mutex_trylock(&cache_chain_mutex)) {
3822 /* Give up. Setup the next iteration. */ 4008 /* Give up. Setup the next iteration. */
3823 schedule_delayed_work(&__get_cpu_var(reap_work), 4009 schedule_delayed_work(&__get_cpu_var(reap_work),
3824 REAPTIMEOUT_CPUC); 4010 round_jiffies_relative(REAPTIMEOUT_CPUC));
3825 return; 4011 return;
3826 } 4012 }
3827 4013
@@ -3867,7 +4053,8 @@ next:
3867 next_reap_node(); 4053 next_reap_node();
3868 refresh_cpu_vm_stats(smp_processor_id()); 4054 refresh_cpu_vm_stats(smp_processor_id());
3869 /* Set up the next iteration */ 4055 /* Set up the next iteration */
3870 schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC); 4056 schedule_delayed_work(&__get_cpu_var(reap_work),
4057 round_jiffies_relative(REAPTIMEOUT_CPUC));
3871} 4058}
3872 4059
3873#ifdef CONFIG_PROC_FS 4060#ifdef CONFIG_PROC_FS
@@ -4035,7 +4222,7 @@ static int s_show(struct seq_file *m, void *p)
4035 * + further values on SMP and with statistics enabled 4222 * + further values on SMP and with statistics enabled
4036 */ 4223 */
4037 4224
4038struct seq_operations slabinfo_op = { 4225const struct seq_operations slabinfo_op = {
4039 .start = s_start, 4226 .start = s_start,
4040 .next = s_next, 4227 .next = s_next,
4041 .stop = s_stop, 4228 .stop = s_stop,
@@ -4233,7 +4420,7 @@ static int leaks_show(struct seq_file *m, void *p)
4233 return 0; 4420 return 0;
4234} 4421}
4235 4422
4236struct seq_operations slabstats_op = { 4423const struct seq_operations slabstats_op = {
4237 .start = leaks_start, 4424 .start = leaks_start,
4238 .next = s_next, 4425 .next = s_next,
4239 .stop = s_stop, 4426 .stop = s_stop,