diff options
Diffstat (limited to 'mm/slab.c')
-rw-r--r-- | mm/slab.c | 298 |
1 files changed, 201 insertions, 97 deletions
@@ -103,12 +103,12 @@ | |||
103 | #include <linux/module.h> | 103 | #include <linux/module.h> |
104 | #include <linux/rcupdate.h> | 104 | #include <linux/rcupdate.h> |
105 | #include <linux/string.h> | 105 | #include <linux/string.h> |
106 | #include <linux/uaccess.h> | ||
106 | #include <linux/nodemask.h> | 107 | #include <linux/nodemask.h> |
107 | #include <linux/mempolicy.h> | 108 | #include <linux/mempolicy.h> |
108 | #include <linux/mutex.h> | 109 | #include <linux/mutex.h> |
109 | #include <linux/rtmutex.h> | 110 | #include <linux/rtmutex.h> |
110 | 111 | ||
111 | #include <asm/uaccess.h> | ||
112 | #include <asm/cacheflush.h> | 112 | #include <asm/cacheflush.h> |
113 | #include <asm/tlbflush.h> | 113 | #include <asm/tlbflush.h> |
114 | #include <asm/page.h> | 114 | #include <asm/page.h> |
@@ -313,7 +313,7 @@ static int drain_freelist(struct kmem_cache *cache, | |||
313 | static void free_block(struct kmem_cache *cachep, void **objpp, int len, | 313 | static void free_block(struct kmem_cache *cachep, void **objpp, int len, |
314 | int node); | 314 | int node); |
315 | static int enable_cpucache(struct kmem_cache *cachep); | 315 | static int enable_cpucache(struct kmem_cache *cachep); |
316 | static void cache_reap(void *unused); | 316 | static void cache_reap(struct work_struct *unused); |
317 | 317 | ||
318 | /* | 318 | /* |
319 | * This function must be completely optimized away if a constant is passed to | 319 | * This function must be completely optimized away if a constant is passed to |
@@ -730,7 +730,10 @@ static inline void init_lock_keys(void) | |||
730 | } | 730 | } |
731 | #endif | 731 | #endif |
732 | 732 | ||
733 | /* Guard access to the cache-chain. */ | 733 | /* |
734 | * 1. Guard access to the cache-chain. | ||
735 | * 2. Protect sanity of cpu_online_map against cpu hotplug events | ||
736 | */ | ||
734 | static DEFINE_MUTEX(cache_chain_mutex); | 737 | static DEFINE_MUTEX(cache_chain_mutex); |
735 | static struct list_head cache_chain; | 738 | static struct list_head cache_chain; |
736 | 739 | ||
@@ -753,7 +756,7 @@ int slab_is_available(void) | |||
753 | return g_cpucache_up == FULL; | 756 | return g_cpucache_up == FULL; |
754 | } | 757 | } |
755 | 758 | ||
756 | static DEFINE_PER_CPU(struct work_struct, reap_work); | 759 | static DEFINE_PER_CPU(struct delayed_work, reap_work); |
757 | 760 | ||
758 | static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) | 761 | static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) |
759 | { | 762 | { |
@@ -866,6 +869,22 @@ static void __slab_error(const char *function, struct kmem_cache *cachep, | |||
866 | dump_stack(); | 869 | dump_stack(); |
867 | } | 870 | } |
868 | 871 | ||
872 | /* | ||
873 | * By default on NUMA we use alien caches to stage the freeing of | ||
874 | * objects allocated from other nodes. This causes massive memory | ||
875 | * inefficiencies when using fake NUMA setup to split memory into a | ||
876 | * large number of small nodes, so it can be disabled on the command | ||
877 | * line | ||
878 | */ | ||
879 | |||
880 | static int use_alien_caches __read_mostly = 1; | ||
881 | static int __init noaliencache_setup(char *s) | ||
882 | { | ||
883 | use_alien_caches = 0; | ||
884 | return 1; | ||
885 | } | ||
886 | __setup("noaliencache", noaliencache_setup); | ||
887 | |||
869 | #ifdef CONFIG_NUMA | 888 | #ifdef CONFIG_NUMA |
870 | /* | 889 | /* |
871 | * Special reaping functions for NUMA systems called from cache_reap(). | 890 | * Special reaping functions for NUMA systems called from cache_reap(). |
@@ -916,16 +935,16 @@ static void next_reap_node(void) | |||
916 | */ | 935 | */ |
917 | static void __devinit start_cpu_timer(int cpu) | 936 | static void __devinit start_cpu_timer(int cpu) |
918 | { | 937 | { |
919 | struct work_struct *reap_work = &per_cpu(reap_work, cpu); | 938 | struct delayed_work *reap_work = &per_cpu(reap_work, cpu); |
920 | 939 | ||
921 | /* | 940 | /* |
922 | * When this gets called from do_initcalls via cpucache_init(), | 941 | * When this gets called from do_initcalls via cpucache_init(), |
923 | * init_workqueues() has already run, so keventd will be setup | 942 | * init_workqueues() has already run, so keventd will be setup |
924 | * at that time. | 943 | * at that time. |
925 | */ | 944 | */ |
926 | if (keventd_up() && reap_work->func == NULL) { | 945 | if (keventd_up() && reap_work->work.func == NULL) { |
927 | init_reap_node(cpu); | 946 | init_reap_node(cpu); |
928 | INIT_WORK(reap_work, cache_reap, NULL); | 947 | INIT_DELAYED_WORK(reap_work, cache_reap); |
929 | schedule_delayed_work_on(cpu, reap_work, HZ + 3 * cpu); | 948 | schedule_delayed_work_on(cpu, reap_work, HZ + 3 * cpu); |
930 | } | 949 | } |
931 | } | 950 | } |
@@ -996,7 +1015,7 @@ static inline void *alternate_node_alloc(struct kmem_cache *cachep, | |||
996 | return NULL; | 1015 | return NULL; |
997 | } | 1016 | } |
998 | 1017 | ||
999 | static inline void *__cache_alloc_node(struct kmem_cache *cachep, | 1018 | static inline void *____cache_alloc_node(struct kmem_cache *cachep, |
1000 | gfp_t flags, int nodeid) | 1019 | gfp_t flags, int nodeid) |
1001 | { | 1020 | { |
1002 | return NULL; | 1021 | return NULL; |
@@ -1004,7 +1023,7 @@ static inline void *__cache_alloc_node(struct kmem_cache *cachep, | |||
1004 | 1023 | ||
1005 | #else /* CONFIG_NUMA */ | 1024 | #else /* CONFIG_NUMA */ |
1006 | 1025 | ||
1007 | static void *__cache_alloc_node(struct kmem_cache *, gfp_t, int); | 1026 | static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int); |
1008 | static void *alternate_node_alloc(struct kmem_cache *, gfp_t); | 1027 | static void *alternate_node_alloc(struct kmem_cache *, gfp_t); |
1009 | 1028 | ||
1010 | static struct array_cache **alloc_alien_cache(int node, int limit) | 1029 | static struct array_cache **alloc_alien_cache(int node, int limit) |
@@ -1114,7 +1133,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) | |||
1114 | * Make sure we are not freeing a object from another node to the array | 1133 | * Make sure we are not freeing a object from another node to the array |
1115 | * cache on this cpu. | 1134 | * cache on this cpu. |
1116 | */ | 1135 | */ |
1117 | if (likely(slabp->nodeid == node)) | 1136 | if (likely(slabp->nodeid == node) || unlikely(!use_alien_caches)) |
1118 | return 0; | 1137 | return 0; |
1119 | 1138 | ||
1120 | l3 = cachep->nodelists[node]; | 1139 | l3 = cachep->nodelists[node]; |
@@ -1192,7 +1211,7 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, | |||
1192 | list_for_each_entry(cachep, &cache_chain, next) { | 1211 | list_for_each_entry(cachep, &cache_chain, next) { |
1193 | struct array_cache *nc; | 1212 | struct array_cache *nc; |
1194 | struct array_cache *shared; | 1213 | struct array_cache *shared; |
1195 | struct array_cache **alien; | 1214 | struct array_cache **alien = NULL; |
1196 | 1215 | ||
1197 | nc = alloc_arraycache(node, cachep->limit, | 1216 | nc = alloc_arraycache(node, cachep->limit, |
1198 | cachep->batchcount); | 1217 | cachep->batchcount); |
@@ -1204,9 +1223,11 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, | |||
1204 | if (!shared) | 1223 | if (!shared) |
1205 | goto bad; | 1224 | goto bad; |
1206 | 1225 | ||
1207 | alien = alloc_alien_cache(node, cachep->limit); | 1226 | if (use_alien_caches) { |
1208 | if (!alien) | 1227 | alien = alloc_alien_cache(node, cachep->limit); |
1209 | goto bad; | 1228 | if (!alien) |
1229 | goto bad; | ||
1230 | } | ||
1210 | cachep->array[cpu] = nc; | 1231 | cachep->array[cpu] = nc; |
1211 | l3 = cachep->nodelists[node]; | 1232 | l3 = cachep->nodelists[node]; |
1212 | BUG_ON(!l3); | 1233 | BUG_ON(!l3); |
@@ -1230,12 +1251,18 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, | |||
1230 | kfree(shared); | 1251 | kfree(shared); |
1231 | free_alien_cache(alien); | 1252 | free_alien_cache(alien); |
1232 | } | 1253 | } |
1233 | mutex_unlock(&cache_chain_mutex); | ||
1234 | break; | 1254 | break; |
1235 | case CPU_ONLINE: | 1255 | case CPU_ONLINE: |
1256 | mutex_unlock(&cache_chain_mutex); | ||
1236 | start_cpu_timer(cpu); | 1257 | start_cpu_timer(cpu); |
1237 | break; | 1258 | break; |
1238 | #ifdef CONFIG_HOTPLUG_CPU | 1259 | #ifdef CONFIG_HOTPLUG_CPU |
1260 | case CPU_DOWN_PREPARE: | ||
1261 | mutex_lock(&cache_chain_mutex); | ||
1262 | break; | ||
1263 | case CPU_DOWN_FAILED: | ||
1264 | mutex_unlock(&cache_chain_mutex); | ||
1265 | break; | ||
1239 | case CPU_DEAD: | 1266 | case CPU_DEAD: |
1240 | /* | 1267 | /* |
1241 | * Even if all the cpus of a node are down, we don't free the | 1268 | * Even if all the cpus of a node are down, we don't free the |
@@ -1246,8 +1273,8 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, | |||
1246 | * gets destroyed at kmem_cache_destroy(). | 1273 | * gets destroyed at kmem_cache_destroy(). |
1247 | */ | 1274 | */ |
1248 | /* fall thru */ | 1275 | /* fall thru */ |
1276 | #endif | ||
1249 | case CPU_UP_CANCELED: | 1277 | case CPU_UP_CANCELED: |
1250 | mutex_lock(&cache_chain_mutex); | ||
1251 | list_for_each_entry(cachep, &cache_chain, next) { | 1278 | list_for_each_entry(cachep, &cache_chain, next) { |
1252 | struct array_cache *nc; | 1279 | struct array_cache *nc; |
1253 | struct array_cache *shared; | 1280 | struct array_cache *shared; |
@@ -1308,11 +1335,9 @@ free_array_cache: | |||
1308 | } | 1335 | } |
1309 | mutex_unlock(&cache_chain_mutex); | 1336 | mutex_unlock(&cache_chain_mutex); |
1310 | break; | 1337 | break; |
1311 | #endif | ||
1312 | } | 1338 | } |
1313 | return NOTIFY_OK; | 1339 | return NOTIFY_OK; |
1314 | bad: | 1340 | bad: |
1315 | mutex_unlock(&cache_chain_mutex); | ||
1316 | return NOTIFY_BAD; | 1341 | return NOTIFY_BAD; |
1317 | } | 1342 | } |
1318 | 1343 | ||
@@ -1580,12 +1605,7 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
1580 | flags |= __GFP_COMP; | 1605 | flags |= __GFP_COMP; |
1581 | #endif | 1606 | #endif |
1582 | 1607 | ||
1583 | /* | 1608 | flags |= cachep->gfpflags; |
1584 | * Under NUMA we want memory on the indicated node. We will handle | ||
1585 | * the needed fallback ourselves since we want to serve from our | ||
1586 | * per node object lists first for other nodes. | ||
1587 | */ | ||
1588 | flags |= cachep->gfpflags | GFP_THISNODE; | ||
1589 | 1609 | ||
1590 | page = alloc_pages_node(nodeid, flags, cachep->gfporder); | 1610 | page = alloc_pages_node(nodeid, flags, cachep->gfporder); |
1591 | if (!page) | 1611 | if (!page) |
@@ -2098,15 +2118,12 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
2098 | } | 2118 | } |
2099 | 2119 | ||
2100 | /* | 2120 | /* |
2101 | * Prevent CPUs from coming and going. | 2121 | * We use cache_chain_mutex to ensure a consistent view of |
2102 | * lock_cpu_hotplug() nests outside cache_chain_mutex | 2122 | * cpu_online_map as well. Please see cpuup_callback |
2103 | */ | 2123 | */ |
2104 | lock_cpu_hotplug(); | ||
2105 | |||
2106 | mutex_lock(&cache_chain_mutex); | 2124 | mutex_lock(&cache_chain_mutex); |
2107 | 2125 | ||
2108 | list_for_each_entry(pc, &cache_chain, next) { | 2126 | list_for_each_entry(pc, &cache_chain, next) { |
2109 | mm_segment_t old_fs = get_fs(); | ||
2110 | char tmp; | 2127 | char tmp; |
2111 | int res; | 2128 | int res; |
2112 | 2129 | ||
@@ -2115,9 +2132,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
2115 | * destroy its slab cache and no-one else reuses the vmalloc | 2132 | * destroy its slab cache and no-one else reuses the vmalloc |
2116 | * area of the module. Print a warning. | 2133 | * area of the module. Print a warning. |
2117 | */ | 2134 | */ |
2118 | set_fs(KERNEL_DS); | 2135 | res = probe_kernel_address(pc->name, tmp); |
2119 | res = __get_user(tmp, pc->name); | ||
2120 | set_fs(old_fs); | ||
2121 | if (res) { | 2136 | if (res) { |
2122 | printk("SLAB: cache with size %d has lost its name\n", | 2137 | printk("SLAB: cache with size %d has lost its name\n", |
2123 | pc->buffer_size); | 2138 | pc->buffer_size); |
@@ -2197,25 +2212,24 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
2197 | if (flags & SLAB_RED_ZONE || flags & SLAB_STORE_USER) | 2212 | if (flags & SLAB_RED_ZONE || flags & SLAB_STORE_USER) |
2198 | ralign = BYTES_PER_WORD; | 2213 | ralign = BYTES_PER_WORD; |
2199 | 2214 | ||
2200 | /* 2) arch mandated alignment: disables debug if necessary */ | 2215 | /* 2) arch mandated alignment */ |
2201 | if (ralign < ARCH_SLAB_MINALIGN) { | 2216 | if (ralign < ARCH_SLAB_MINALIGN) { |
2202 | ralign = ARCH_SLAB_MINALIGN; | 2217 | ralign = ARCH_SLAB_MINALIGN; |
2203 | if (ralign > BYTES_PER_WORD) | ||
2204 | flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); | ||
2205 | } | 2218 | } |
2206 | /* 3) caller mandated alignment: disables debug if necessary */ | 2219 | /* 3) caller mandated alignment */ |
2207 | if (ralign < align) { | 2220 | if (ralign < align) { |
2208 | ralign = align; | 2221 | ralign = align; |
2209 | if (ralign > BYTES_PER_WORD) | ||
2210 | flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); | ||
2211 | } | 2222 | } |
2223 | /* disable debug if necessary */ | ||
2224 | if (ralign > BYTES_PER_WORD) | ||
2225 | flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); | ||
2212 | /* | 2226 | /* |
2213 | * 4) Store it. | 2227 | * 4) Store it. |
2214 | */ | 2228 | */ |
2215 | align = ralign; | 2229 | align = ralign; |
2216 | 2230 | ||
2217 | /* Get cache's description obj. */ | 2231 | /* Get cache's description obj. */ |
2218 | cachep = kmem_cache_zalloc(&cache_cache, SLAB_KERNEL); | 2232 | cachep = kmem_cache_zalloc(&cache_cache, GFP_KERNEL); |
2219 | if (!cachep) | 2233 | if (!cachep) |
2220 | goto oops; | 2234 | goto oops; |
2221 | 2235 | ||
@@ -2326,7 +2340,6 @@ oops: | |||
2326 | panic("kmem_cache_create(): failed to create slab `%s'\n", | 2340 | panic("kmem_cache_create(): failed to create slab `%s'\n", |
2327 | name); | 2341 | name); |
2328 | mutex_unlock(&cache_chain_mutex); | 2342 | mutex_unlock(&cache_chain_mutex); |
2329 | unlock_cpu_hotplug(); | ||
2330 | return cachep; | 2343 | return cachep; |
2331 | } | 2344 | } |
2332 | EXPORT_SYMBOL(kmem_cache_create); | 2345 | EXPORT_SYMBOL(kmem_cache_create); |
@@ -2444,6 +2457,7 @@ out: | |||
2444 | return nr_freed; | 2457 | return nr_freed; |
2445 | } | 2458 | } |
2446 | 2459 | ||
2460 | /* Called with cache_chain_mutex held to protect against cpu hotplug */ | ||
2447 | static int __cache_shrink(struct kmem_cache *cachep) | 2461 | static int __cache_shrink(struct kmem_cache *cachep) |
2448 | { | 2462 | { |
2449 | int ret = 0, i = 0; | 2463 | int ret = 0, i = 0; |
@@ -2474,9 +2488,13 @@ static int __cache_shrink(struct kmem_cache *cachep) | |||
2474 | */ | 2488 | */ |
2475 | int kmem_cache_shrink(struct kmem_cache *cachep) | 2489 | int kmem_cache_shrink(struct kmem_cache *cachep) |
2476 | { | 2490 | { |
2491 | int ret; | ||
2477 | BUG_ON(!cachep || in_interrupt()); | 2492 | BUG_ON(!cachep || in_interrupt()); |
2478 | 2493 | ||
2479 | return __cache_shrink(cachep); | 2494 | mutex_lock(&cache_chain_mutex); |
2495 | ret = __cache_shrink(cachep); | ||
2496 | mutex_unlock(&cache_chain_mutex); | ||
2497 | return ret; | ||
2480 | } | 2498 | } |
2481 | EXPORT_SYMBOL(kmem_cache_shrink); | 2499 | EXPORT_SYMBOL(kmem_cache_shrink); |
2482 | 2500 | ||
@@ -2500,23 +2518,16 @@ void kmem_cache_destroy(struct kmem_cache *cachep) | |||
2500 | { | 2518 | { |
2501 | BUG_ON(!cachep || in_interrupt()); | 2519 | BUG_ON(!cachep || in_interrupt()); |
2502 | 2520 | ||
2503 | /* Don't let CPUs to come and go */ | ||
2504 | lock_cpu_hotplug(); | ||
2505 | |||
2506 | /* Find the cache in the chain of caches. */ | 2521 | /* Find the cache in the chain of caches. */ |
2507 | mutex_lock(&cache_chain_mutex); | 2522 | mutex_lock(&cache_chain_mutex); |
2508 | /* | 2523 | /* |
2509 | * the chain is never empty, cache_cache is never destroyed | 2524 | * the chain is never empty, cache_cache is never destroyed |
2510 | */ | 2525 | */ |
2511 | list_del(&cachep->next); | 2526 | list_del(&cachep->next); |
2512 | mutex_unlock(&cache_chain_mutex); | ||
2513 | |||
2514 | if (__cache_shrink(cachep)) { | 2527 | if (__cache_shrink(cachep)) { |
2515 | slab_error(cachep, "Can't free all objects"); | 2528 | slab_error(cachep, "Can't free all objects"); |
2516 | mutex_lock(&cache_chain_mutex); | ||
2517 | list_add(&cachep->next, &cache_chain); | 2529 | list_add(&cachep->next, &cache_chain); |
2518 | mutex_unlock(&cache_chain_mutex); | 2530 | mutex_unlock(&cache_chain_mutex); |
2519 | unlock_cpu_hotplug(); | ||
2520 | return; | 2531 | return; |
2521 | } | 2532 | } |
2522 | 2533 | ||
@@ -2524,7 +2535,7 @@ void kmem_cache_destroy(struct kmem_cache *cachep) | |||
2524 | synchronize_rcu(); | 2535 | synchronize_rcu(); |
2525 | 2536 | ||
2526 | __kmem_cache_destroy(cachep); | 2537 | __kmem_cache_destroy(cachep); |
2527 | unlock_cpu_hotplug(); | 2538 | mutex_unlock(&cache_chain_mutex); |
2528 | } | 2539 | } |
2529 | EXPORT_SYMBOL(kmem_cache_destroy); | 2540 | EXPORT_SYMBOL(kmem_cache_destroy); |
2530 | 2541 | ||
@@ -2548,7 +2559,7 @@ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, | |||
2548 | if (OFF_SLAB(cachep)) { | 2559 | if (OFF_SLAB(cachep)) { |
2549 | /* Slab management obj is off-slab. */ | 2560 | /* Slab management obj is off-slab. */ |
2550 | slabp = kmem_cache_alloc_node(cachep->slabp_cache, | 2561 | slabp = kmem_cache_alloc_node(cachep->slabp_cache, |
2551 | local_flags, nodeid); | 2562 | local_flags & ~GFP_THISNODE, nodeid); |
2552 | if (!slabp) | 2563 | if (!slabp) |
2553 | return NULL; | 2564 | return NULL; |
2554 | } else { | 2565 | } else { |
@@ -2618,7 +2629,7 @@ static void cache_init_objs(struct kmem_cache *cachep, | |||
2618 | 2629 | ||
2619 | static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) | 2630 | static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) |
2620 | { | 2631 | { |
2621 | if (flags & SLAB_DMA) | 2632 | if (flags & GFP_DMA) |
2622 | BUG_ON(!(cachep->gfpflags & GFP_DMA)); | 2633 | BUG_ON(!(cachep->gfpflags & GFP_DMA)); |
2623 | else | 2634 | else |
2624 | BUG_ON(cachep->gfpflags & GFP_DMA); | 2635 | BUG_ON(cachep->gfpflags & GFP_DMA); |
@@ -2689,10 +2700,10 @@ static void slab_map_pages(struct kmem_cache *cache, struct slab *slab, | |||
2689 | * Grow (by 1) the number of slabs within a cache. This is called by | 2700 | * Grow (by 1) the number of slabs within a cache. This is called by |
2690 | * kmem_cache_alloc() when there are no active objs left in a cache. | 2701 | * kmem_cache_alloc() when there are no active objs left in a cache. |
2691 | */ | 2702 | */ |
2692 | static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid) | 2703 | static int cache_grow(struct kmem_cache *cachep, |
2704 | gfp_t flags, int nodeid, void *objp) | ||
2693 | { | 2705 | { |
2694 | struct slab *slabp; | 2706 | struct slab *slabp; |
2695 | void *objp; | ||
2696 | size_t offset; | 2707 | size_t offset; |
2697 | gfp_t local_flags; | 2708 | gfp_t local_flags; |
2698 | unsigned long ctor_flags; | 2709 | unsigned long ctor_flags; |
@@ -2702,12 +2713,12 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
2702 | * Be lazy and only check for valid flags here, keeping it out of the | 2713 | * Be lazy and only check for valid flags here, keeping it out of the |
2703 | * critical path in kmem_cache_alloc(). | 2714 | * critical path in kmem_cache_alloc(). |
2704 | */ | 2715 | */ |
2705 | BUG_ON(flags & ~(SLAB_DMA | SLAB_LEVEL_MASK | SLAB_NO_GROW)); | 2716 | BUG_ON(flags & ~(GFP_DMA | GFP_LEVEL_MASK | __GFP_NO_GROW)); |
2706 | if (flags & SLAB_NO_GROW) | 2717 | if (flags & __GFP_NO_GROW) |
2707 | return 0; | 2718 | return 0; |
2708 | 2719 | ||
2709 | ctor_flags = SLAB_CTOR_CONSTRUCTOR; | 2720 | ctor_flags = SLAB_CTOR_CONSTRUCTOR; |
2710 | local_flags = (flags & SLAB_LEVEL_MASK); | 2721 | local_flags = (flags & GFP_LEVEL_MASK); |
2711 | if (!(local_flags & __GFP_WAIT)) | 2722 | if (!(local_flags & __GFP_WAIT)) |
2712 | /* | 2723 | /* |
2713 | * Not allowed to sleep. Need to tell a constructor about | 2724 | * Not allowed to sleep. Need to tell a constructor about |
@@ -2744,12 +2755,14 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
2744 | * Get mem for the objs. Attempt to allocate a physical page from | 2755 | * Get mem for the objs. Attempt to allocate a physical page from |
2745 | * 'nodeid'. | 2756 | * 'nodeid'. |
2746 | */ | 2757 | */ |
2747 | objp = kmem_getpages(cachep, flags, nodeid); | 2758 | if (!objp) |
2759 | objp = kmem_getpages(cachep, flags, nodeid); | ||
2748 | if (!objp) | 2760 | if (!objp) |
2749 | goto failed; | 2761 | goto failed; |
2750 | 2762 | ||
2751 | /* Get slab management. */ | 2763 | /* Get slab management. */ |
2752 | slabp = alloc_slabmgmt(cachep, objp, offset, local_flags, nodeid); | 2764 | slabp = alloc_slabmgmt(cachep, objp, offset, |
2765 | local_flags & ~GFP_THISNODE, nodeid); | ||
2753 | if (!slabp) | 2766 | if (!slabp) |
2754 | goto opps1; | 2767 | goto opps1; |
2755 | 2768 | ||
@@ -2987,7 +3000,7 @@ alloc_done: | |||
2987 | 3000 | ||
2988 | if (unlikely(!ac->avail)) { | 3001 | if (unlikely(!ac->avail)) { |
2989 | int x; | 3002 | int x; |
2990 | x = cache_grow(cachep, flags, node); | 3003 | x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL); |
2991 | 3004 | ||
2992 | /* cache_grow can reenable interrupts, then ac could change. */ | 3005 | /* cache_grow can reenable interrupts, then ac could change. */ |
2993 | ac = cpu_cache_get(cachep); | 3006 | ac = cpu_cache_get(cachep); |
@@ -3063,6 +3076,12 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, | |||
3063 | 3076 | ||
3064 | cachep->ctor(objp, cachep, ctor_flags); | 3077 | cachep->ctor(objp, cachep, ctor_flags); |
3065 | } | 3078 | } |
3079 | #if ARCH_SLAB_MINALIGN | ||
3080 | if ((u32)objp & (ARCH_SLAB_MINALIGN-1)) { | ||
3081 | printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n", | ||
3082 | objp, ARCH_SLAB_MINALIGN); | ||
3083 | } | ||
3084 | #endif | ||
3066 | return objp; | 3085 | return objp; |
3067 | } | 3086 | } |
3068 | #else | 3087 | #else |
@@ -3105,10 +3124,10 @@ static __always_inline void *__cache_alloc(struct kmem_cache *cachep, | |||
3105 | objp = ____cache_alloc(cachep, flags); | 3124 | objp = ____cache_alloc(cachep, flags); |
3106 | /* | 3125 | /* |
3107 | * We may just have run out of memory on the local node. | 3126 | * We may just have run out of memory on the local node. |
3108 | * __cache_alloc_node() knows how to locate memory on other nodes | 3127 | * ____cache_alloc_node() knows how to locate memory on other nodes |
3109 | */ | 3128 | */ |
3110 | if (NUMA_BUILD && !objp) | 3129 | if (NUMA_BUILD && !objp) |
3111 | objp = __cache_alloc_node(cachep, flags, numa_node_id()); | 3130 | objp = ____cache_alloc_node(cachep, flags, numa_node_id()); |
3112 | local_irq_restore(save_flags); | 3131 | local_irq_restore(save_flags); |
3113 | objp = cache_alloc_debugcheck_after(cachep, flags, objp, | 3132 | objp = cache_alloc_debugcheck_after(cachep, flags, objp, |
3114 | caller); | 3133 | caller); |
@@ -3135,15 +3154,17 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags) | |||
3135 | else if (current->mempolicy) | 3154 | else if (current->mempolicy) |
3136 | nid_alloc = slab_node(current->mempolicy); | 3155 | nid_alloc = slab_node(current->mempolicy); |
3137 | if (nid_alloc != nid_here) | 3156 | if (nid_alloc != nid_here) |
3138 | return __cache_alloc_node(cachep, flags, nid_alloc); | 3157 | return ____cache_alloc_node(cachep, flags, nid_alloc); |
3139 | return NULL; | 3158 | return NULL; |
3140 | } | 3159 | } |
3141 | 3160 | ||
3142 | /* | 3161 | /* |
3143 | * Fallback function if there was no memory available and no objects on a | 3162 | * Fallback function if there was no memory available and no objects on a |
3144 | * certain node and we are allowed to fall back. We mimick the behavior of | 3163 | * certain node and fall back is permitted. First we scan all the |
3145 | * the page allocator. We fall back according to a zonelist determined by | 3164 | * available nodelists for available objects. If that fails then we |
3146 | * the policy layer while obeying cpuset constraints. | 3165 | * perform an allocation without specifying a node. This allows the page |
3166 | * allocator to do its reclaim / fallback magic. We then insert the | ||
3167 | * slab into the proper nodelist and then allocate from it. | ||
3147 | */ | 3168 | */ |
3148 | void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) | 3169 | void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) |
3149 | { | 3170 | { |
@@ -3151,15 +3172,51 @@ void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) | |||
3151 | ->node_zonelists[gfp_zone(flags)]; | 3172 | ->node_zonelists[gfp_zone(flags)]; |
3152 | struct zone **z; | 3173 | struct zone **z; |
3153 | void *obj = NULL; | 3174 | void *obj = NULL; |
3175 | int nid; | ||
3154 | 3176 | ||
3177 | retry: | ||
3178 | /* | ||
3179 | * Look through allowed nodes for objects available | ||
3180 | * from existing per node queues. | ||
3181 | */ | ||
3155 | for (z = zonelist->zones; *z && !obj; z++) { | 3182 | for (z = zonelist->zones; *z && !obj; z++) { |
3156 | int nid = zone_to_nid(*z); | 3183 | nid = zone_to_nid(*z); |
3184 | |||
3185 | if (cpuset_zone_allowed(*z, flags) && | ||
3186 | cache->nodelists[nid] && | ||
3187 | cache->nodelists[nid]->free_objects) | ||
3188 | obj = ____cache_alloc_node(cache, | ||
3189 | flags | GFP_THISNODE, nid); | ||
3190 | } | ||
3157 | 3191 | ||
3158 | if (zone_idx(*z) <= ZONE_NORMAL && | 3192 | if (!obj) { |
3159 | cpuset_zone_allowed(*z, flags) && | 3193 | /* |
3160 | cache->nodelists[nid]) | 3194 | * This allocation will be performed within the constraints |
3161 | obj = __cache_alloc_node(cache, | 3195 | * of the current cpuset / memory policy requirements. |
3162 | flags | __GFP_THISNODE, nid); | 3196 | * We may trigger various forms of reclaim on the allowed |
3197 | * set and go into memory reserves if necessary. | ||
3198 | */ | ||
3199 | obj = kmem_getpages(cache, flags, -1); | ||
3200 | if (obj) { | ||
3201 | /* | ||
3202 | * Insert into the appropriate per node queues | ||
3203 | */ | ||
3204 | nid = page_to_nid(virt_to_page(obj)); | ||
3205 | if (cache_grow(cache, flags, nid, obj)) { | ||
3206 | obj = ____cache_alloc_node(cache, | ||
3207 | flags | GFP_THISNODE, nid); | ||
3208 | if (!obj) | ||
3209 | /* | ||
3210 | * Another processor may allocate the | ||
3211 | * objects in the slab since we are | ||
3212 | * not holding any locks. | ||
3213 | */ | ||
3214 | goto retry; | ||
3215 | } else { | ||
3216 | kmem_freepages(cache, obj); | ||
3217 | obj = NULL; | ||
3218 | } | ||
3219 | } | ||
3163 | } | 3220 | } |
3164 | return obj; | 3221 | return obj; |
3165 | } | 3222 | } |
@@ -3167,7 +3224,7 @@ void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) | |||
3167 | /* | 3224 | /* |
3168 | * A interface to enable slab creation on nodeid | 3225 | * A interface to enable slab creation on nodeid |
3169 | */ | 3226 | */ |
3170 | static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, | 3227 | static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, |
3171 | int nodeid) | 3228 | int nodeid) |
3172 | { | 3229 | { |
3173 | struct list_head *entry; | 3230 | struct list_head *entry; |
@@ -3216,7 +3273,7 @@ retry: | |||
3216 | 3273 | ||
3217 | must_grow: | 3274 | must_grow: |
3218 | spin_unlock(&l3->list_lock); | 3275 | spin_unlock(&l3->list_lock); |
3219 | x = cache_grow(cachep, flags, nodeid); | 3276 | x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL); |
3220 | if (x) | 3277 | if (x) |
3221 | goto retry; | 3278 | goto retry; |
3222 | 3279 | ||
@@ -3434,35 +3491,59 @@ out: | |||
3434 | * @flags: See kmalloc(). | 3491 | * @flags: See kmalloc(). |
3435 | * @nodeid: node number of the target node. | 3492 | * @nodeid: node number of the target node. |
3436 | * | 3493 | * |
3437 | * Identical to kmem_cache_alloc, except that this function is slow | 3494 | * Identical to kmem_cache_alloc but it will allocate memory on the given |
3438 | * and can sleep. And it will allocate memory on the given node, which | 3495 | * node, which can improve the performance for cpu bound structures. |
3439 | * can improve the performance for cpu bound structures. | 3496 | * |
3440 | * New and improved: it will now make sure that the object gets | 3497 | * Fallback to other node is possible if __GFP_THISNODE is not set. |
3441 | * put on the correct node list so that there is no false sharing. | ||
3442 | */ | 3498 | */ |
3443 | void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) | 3499 | static __always_inline void * |
3500 | __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, | ||
3501 | int nodeid, void *caller) | ||
3444 | { | 3502 | { |
3445 | unsigned long save_flags; | 3503 | unsigned long save_flags; |
3446 | void *ptr; | 3504 | void *ptr = NULL; |
3447 | 3505 | ||
3448 | cache_alloc_debugcheck_before(cachep, flags); | 3506 | cache_alloc_debugcheck_before(cachep, flags); |
3449 | local_irq_save(save_flags); | 3507 | local_irq_save(save_flags); |
3450 | 3508 | ||
3451 | if (nodeid == -1 || nodeid == numa_node_id() || | 3509 | if (unlikely(nodeid == -1)) |
3452 | !cachep->nodelists[nodeid]) | 3510 | nodeid = numa_node_id(); |
3453 | ptr = ____cache_alloc(cachep, flags); | 3511 | |
3454 | else | 3512 | if (likely(cachep->nodelists[nodeid])) { |
3455 | ptr = __cache_alloc_node(cachep, flags, nodeid); | 3513 | if (nodeid == numa_node_id()) { |
3456 | local_irq_restore(save_flags); | 3514 | /* |
3515 | * Use the locally cached objects if possible. | ||
3516 | * However ____cache_alloc does not allow fallback | ||
3517 | * to other nodes. It may fail while we still have | ||
3518 | * objects on other nodes available. | ||
3519 | */ | ||
3520 | ptr = ____cache_alloc(cachep, flags); | ||
3521 | } | ||
3522 | if (!ptr) { | ||
3523 | /* ___cache_alloc_node can fall back to other nodes */ | ||
3524 | ptr = ____cache_alloc_node(cachep, flags, nodeid); | ||
3525 | } | ||
3526 | } else { | ||
3527 | /* Node not bootstrapped yet */ | ||
3528 | if (!(flags & __GFP_THISNODE)) | ||
3529 | ptr = fallback_alloc(cachep, flags); | ||
3530 | } | ||
3457 | 3531 | ||
3458 | ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, | 3532 | local_irq_restore(save_flags); |
3459 | __builtin_return_address(0)); | 3533 | ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller); |
3460 | 3534 | ||
3461 | return ptr; | 3535 | return ptr; |
3462 | } | 3536 | } |
3537 | |||
3538 | void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) | ||
3539 | { | ||
3540 | return __cache_alloc_node(cachep, flags, nodeid, | ||
3541 | __builtin_return_address(0)); | ||
3542 | } | ||
3463 | EXPORT_SYMBOL(kmem_cache_alloc_node); | 3543 | EXPORT_SYMBOL(kmem_cache_alloc_node); |
3464 | 3544 | ||
3465 | void *__kmalloc_node(size_t size, gfp_t flags, int node) | 3545 | static __always_inline void * |
3546 | __do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller) | ||
3466 | { | 3547 | { |
3467 | struct kmem_cache *cachep; | 3548 | struct kmem_cache *cachep; |
3468 | 3549 | ||
@@ -3471,8 +3552,29 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) | |||
3471 | return NULL; | 3552 | return NULL; |
3472 | return kmem_cache_alloc_node(cachep, flags, node); | 3553 | return kmem_cache_alloc_node(cachep, flags, node); |
3473 | } | 3554 | } |
3555 | |||
3556 | #ifdef CONFIG_DEBUG_SLAB | ||
3557 | void *__kmalloc_node(size_t size, gfp_t flags, int node) | ||
3558 | { | ||
3559 | return __do_kmalloc_node(size, flags, node, | ||
3560 | __builtin_return_address(0)); | ||
3561 | } | ||
3474 | EXPORT_SYMBOL(__kmalloc_node); | 3562 | EXPORT_SYMBOL(__kmalloc_node); |
3475 | #endif | 3563 | |
3564 | void *__kmalloc_node_track_caller(size_t size, gfp_t flags, | ||
3565 | int node, void *caller) | ||
3566 | { | ||
3567 | return __do_kmalloc_node(size, flags, node, caller); | ||
3568 | } | ||
3569 | EXPORT_SYMBOL(__kmalloc_node_track_caller); | ||
3570 | #else | ||
3571 | void *__kmalloc_node(size_t size, gfp_t flags, int node) | ||
3572 | { | ||
3573 | return __do_kmalloc_node(size, flags, node, NULL); | ||
3574 | } | ||
3575 | EXPORT_SYMBOL(__kmalloc_node); | ||
3576 | #endif /* CONFIG_DEBUG_SLAB */ | ||
3577 | #endif /* CONFIG_NUMA */ | ||
3476 | 3578 | ||
3477 | /** | 3579 | /** |
3478 | * __do_kmalloc - allocate memory | 3580 | * __do_kmalloc - allocate memory |
@@ -3583,13 +3685,15 @@ static int alloc_kmemlist(struct kmem_cache *cachep) | |||
3583 | int node; | 3685 | int node; |
3584 | struct kmem_list3 *l3; | 3686 | struct kmem_list3 *l3; |
3585 | struct array_cache *new_shared; | 3687 | struct array_cache *new_shared; |
3586 | struct array_cache **new_alien; | 3688 | struct array_cache **new_alien = NULL; |
3587 | 3689 | ||
3588 | for_each_online_node(node) { | 3690 | for_each_online_node(node) { |
3589 | 3691 | ||
3590 | new_alien = alloc_alien_cache(node, cachep->limit); | 3692 | if (use_alien_caches) { |
3591 | if (!new_alien) | 3693 | new_alien = alloc_alien_cache(node, cachep->limit); |
3592 | goto fail; | 3694 | if (!new_alien) |
3695 | goto fail; | ||
3696 | } | ||
3593 | 3697 | ||
3594 | new_shared = alloc_arraycache(node, | 3698 | new_shared = alloc_arraycache(node, |
3595 | cachep->shared*cachep->batchcount, | 3699 | cachep->shared*cachep->batchcount, |
@@ -3815,7 +3919,7 @@ void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, | |||
3815 | * If we cannot acquire the cache chain mutex then just give up - we'll try | 3919 | * If we cannot acquire the cache chain mutex then just give up - we'll try |
3816 | * again on the next iteration. | 3920 | * again on the next iteration. |
3817 | */ | 3921 | */ |
3818 | static void cache_reap(void *unused) | 3922 | static void cache_reap(struct work_struct *unused) |
3819 | { | 3923 | { |
3820 | struct kmem_cache *searchp; | 3924 | struct kmem_cache *searchp; |
3821 | struct kmem_list3 *l3; | 3925 | struct kmem_list3 *l3; |
@@ -4038,7 +4142,7 @@ static int s_show(struct seq_file *m, void *p) | |||
4038 | * + further values on SMP and with statistics enabled | 4142 | * + further values on SMP and with statistics enabled |
4039 | */ | 4143 | */ |
4040 | 4144 | ||
4041 | struct seq_operations slabinfo_op = { | 4145 | const struct seq_operations slabinfo_op = { |
4042 | .start = s_start, | 4146 | .start = s_start, |
4043 | .next = s_next, | 4147 | .next = s_next, |
4044 | .stop = s_stop, | 4148 | .stop = s_stop, |
@@ -4236,7 +4340,7 @@ static int leaks_show(struct seq_file *m, void *p) | |||
4236 | return 0; | 4340 | return 0; |
4237 | } | 4341 | } |
4238 | 4342 | ||
4239 | struct seq_operations slabstats_op = { | 4343 | const struct seq_operations slabstats_op = { |
4240 | .start = leaks_start, | 4344 | .start = leaks_start, |
4241 | .next = s_next, | 4345 | .next = s_next, |
4242 | .stop = s_stop, | 4346 | .stop = s_stop, |