aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorManfred Spraul <manfred@dbl.q-ag.de>2005-05-01 11:58:38 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-05-01 11:58:38 -0400
commit97e2bde47f886a317909c8a8f9bd2fcd8ce2f0b0 (patch)
treebef8ff5bd628ecffd188bdbad4c765bf5e737b8d /mm
parentdd1d5afca8d3bda7ff9db773fc08e648d2503dc6 (diff)
[PATCH] add kmalloc_node, inline cleanup
The patch makes the following function calls available to allocate memory on a specific node without changing the basic operation of the slab allocator: kmem_cache_alloc_node(kmem_cache_t *cachep, unsigned int flags, int node); kmalloc_node(size_t size, unsigned int flags, int node); in a similar way to the existing node-blind functions: kmem_cache_alloc(kmem_cache_t *cachep, unsigned int flags); kmalloc(size, flags); kmem_cache_alloc_node was changed to pass flags and the node information through the existing layers of the slab allocator (which lead to some minor rearrangements). The functions at the lowest layer (kmem_getpages, cache_grow) are already node aware. Also __alloc_percpu can call kmalloc_node now. Performance measurements (using the pageset localization patch) yields: w/o patches: Tasks jobs/min jti jobs/min/task real cpu 1 484.27 100 484.2736 12.02 1.97 Wed Mar 30 20:50:43 2005 100 25170.83 91 251.7083 23.12 150.10 Wed Mar 30 20:51:06 2005 200 34601.66 84 173.0083 33.64 294.14 Wed Mar 30 20:51:40 2005 300 37154.47 86 123.8482 46.99 436.56 Wed Mar 30 20:52:28 2005 400 39839.82 80 99.5995 58.43 580.46 Wed Mar 30 20:53:27 2005 500 40036.32 79 80.0726 72.68 728.60 Wed Mar 30 20:54:40 2005 600 44074.21 79 73.4570 79.23 872.10 Wed Mar 30 20:55:59 2005 700 44016.60 78 62.8809 92.56 1015.84 Wed Mar 30 20:57:32 2005 800 40411.05 80 50.5138 115.22 1161.13 Wed Mar 30 20:59:28 2005 900 42298.56 79 46.9984 123.83 1303.42 Wed Mar 30 21:01:33 2005 1000 40955.05 80 40.9551 142.11 1441.92 Wed Mar 30 21:03:55 2005 with pageset localization and slab API patches: Tasks jobs/min jti jobs/min/task real cpu 1 484.19 100 484.1930 12.02 1.98 Wed Mar 30 21:10:18 2005 100 27428.25 92 274.2825 21.22 149.79 Wed Mar 30 21:10:40 2005 200 37228.94 86 186.1447 31.27 293.49 Wed Mar 30 21:11:12 2005 300 41725.42 85 139.0847 41.84 434.10 Wed Mar 30 21:11:54 2005 400 43032.22 82 107.5805 54.10 582.06 Wed Mar 30 21:12:48 2005 500 42211.23 83 84.4225 68.94 722.61 Wed Mar 30 21:13:58 2005 600 40084.49 82 66.8075 87.12 873.11 Wed Mar 30 21:15:25 2005 700 44169.30 79 63.0990 92.24 1008.77 Wed Mar 30 21:16:58 2005 800 43097.94 79 53.8724 108.03 1155.88 Wed Mar 30 21:18:47 2005 900 41846.75 79 46.4964 125.17 1303.38 Wed Mar 30 21:20:52 2005 1000 40247.85 79 40.2478 144.60 1442.21 Wed Mar 30 21:23:17 2005 Signed-off-by: Christoph Lameter <christoph@lameter.com> Signed-off-by: Manfred Spraul <manfred@colorfullife.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/slab.c45
1 files changed, 31 insertions, 14 deletions
diff --git a/mm/slab.c b/mm/slab.c
index ec660d85ddd7..771cc09f9f1a 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -583,7 +583,7 @@ static inline struct array_cache *ac_data(kmem_cache_t *cachep)
583 return cachep->array[smp_processor_id()]; 583 return cachep->array[smp_processor_id()];
584} 584}
585 585
586static inline kmem_cache_t *kmem_find_general_cachep(size_t size, int gfpflags) 586static inline kmem_cache_t *__find_general_cachep(size_t size, int gfpflags)
587{ 587{
588 struct cache_sizes *csizep = malloc_sizes; 588 struct cache_sizes *csizep = malloc_sizes;
589 589
@@ -607,6 +607,12 @@ static inline kmem_cache_t *kmem_find_general_cachep(size_t size, int gfpflags)
607 return csizep->cs_cachep; 607 return csizep->cs_cachep;
608} 608}
609 609
610kmem_cache_t *kmem_find_general_cachep(size_t size, int gfpflags)
611{
612 return __find_general_cachep(size, gfpflags);
613}
614EXPORT_SYMBOL(kmem_find_general_cachep);
615
610/* Cal the num objs, wastage, and bytes left over for a given slab size. */ 616/* Cal the num objs, wastage, and bytes left over for a given slab size. */
611static void cache_estimate(unsigned long gfporder, size_t size, size_t align, 617static void cache_estimate(unsigned long gfporder, size_t size, size_t align,
612 int flags, size_t *left_over, unsigned int *num) 618 int flags, size_t *left_over, unsigned int *num)
@@ -672,14 +678,11 @@ static struct array_cache *alloc_arraycache(int cpu, int entries,
672 int memsize = sizeof(void*)*entries+sizeof(struct array_cache); 678 int memsize = sizeof(void*)*entries+sizeof(struct array_cache);
673 struct array_cache *nc = NULL; 679 struct array_cache *nc = NULL;
674 680
675 if (cpu != -1) { 681 if (cpu == -1)
676 kmem_cache_t *cachep;
677 cachep = kmem_find_general_cachep(memsize, GFP_KERNEL);
678 if (cachep)
679 nc = kmem_cache_alloc_node(cachep, cpu_to_node(cpu));
680 }
681 if (!nc)
682 nc = kmalloc(memsize, GFP_KERNEL); 682 nc = kmalloc(memsize, GFP_KERNEL);
683 else
684 nc = kmalloc_node(memsize, GFP_KERNEL, cpu_to_node(cpu));
685
683 if (nc) { 686 if (nc) {
684 nc->avail = 0; 687 nc->avail = 0;
685 nc->limit = entries; 688 nc->limit = entries;
@@ -2361,7 +2364,7 @@ out:
2361 * and can sleep. And it will allocate memory on the given node, which 2364 * and can sleep. And it will allocate memory on the given node, which
2362 * can improve the performance for cpu bound structures. 2365 * can improve the performance for cpu bound structures.
2363 */ 2366 */
2364void *kmem_cache_alloc_node(kmem_cache_t *cachep, int nodeid) 2367void *kmem_cache_alloc_node(kmem_cache_t *cachep, int flags, int nodeid)
2365{ 2368{
2366 int loop; 2369 int loop;
2367 void *objp; 2370 void *objp;
@@ -2393,7 +2396,7 @@ void *kmem_cache_alloc_node(kmem_cache_t *cachep, int nodeid)
2393 spin_unlock_irq(&cachep->spinlock); 2396 spin_unlock_irq(&cachep->spinlock);
2394 2397
2395 local_irq_disable(); 2398 local_irq_disable();
2396 if (!cache_grow(cachep, GFP_KERNEL, nodeid)) { 2399 if (!cache_grow(cachep, flags, nodeid)) {
2397 local_irq_enable(); 2400 local_irq_enable();
2398 return NULL; 2401 return NULL;
2399 } 2402 }
@@ -2435,6 +2438,16 @@ got_slabp:
2435} 2438}
2436EXPORT_SYMBOL(kmem_cache_alloc_node); 2439EXPORT_SYMBOL(kmem_cache_alloc_node);
2437 2440
2441void *kmalloc_node(size_t size, int flags, int node)
2442{
2443 kmem_cache_t *cachep;
2444
2445 cachep = kmem_find_general_cachep(size, flags);
2446 if (unlikely(cachep == NULL))
2447 return NULL;
2448 return kmem_cache_alloc_node(cachep, flags, node);
2449}
2450EXPORT_SYMBOL(kmalloc_node);
2438#endif 2451#endif
2439 2452
2440/** 2453/**
@@ -2462,7 +2475,12 @@ void *__kmalloc(size_t size, unsigned int __nocast flags)
2462{ 2475{
2463 kmem_cache_t *cachep; 2476 kmem_cache_t *cachep;
2464 2477
2465 cachep = kmem_find_general_cachep(size, flags); 2478 /* If you want to save a few bytes .text space: replace
2479 * __ with kmem_.
2480 * Then kmalloc uses the uninlined functions instead of the inline
2481 * functions.
2482 */
2483 cachep = __find_general_cachep(size, flags);
2466 if (unlikely(cachep == NULL)) 2484 if (unlikely(cachep == NULL))
2467 return NULL; 2485 return NULL;
2468 return __cache_alloc(cachep, flags); 2486 return __cache_alloc(cachep, flags);
@@ -2489,9 +2507,8 @@ void *__alloc_percpu(size_t size, size_t align)
2489 for (i = 0; i < NR_CPUS; i++) { 2507 for (i = 0; i < NR_CPUS; i++) {
2490 if (!cpu_possible(i)) 2508 if (!cpu_possible(i))
2491 continue; 2509 continue;
2492 pdata->ptrs[i] = kmem_cache_alloc_node( 2510 pdata->ptrs[i] = kmalloc_node(size, GFP_KERNEL,
2493 kmem_find_general_cachep(size, GFP_KERNEL), 2511 cpu_to_node(i));
2494 cpu_to_node(i));
2495 2512
2496 if (!pdata->ptrs[i]) 2513 if (!pdata->ptrs[i])
2497 goto unwind_oom; 2514 goto unwind_oom;