aboutsummaryrefslogtreecommitdiffstats
path: root/lib/genalloc.c
diff options
context:
space:
mode:
authorBenjamin Gaignard <benjamin.gaignard@linaro.org>2012-10-04 20:13:20 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-05 14:04:57 -0400
commitca279cf1065fb689abea1dc7d8c11787729bb185 (patch)
treefdde907d1c3198f81c9085f858ac64c7a3cc50d8 /lib/genalloc.c
parente96875677fb2b7cb739c5d7769824dff7260d31d (diff)
genalloc: make it possible to use a custom allocation algorithm
Premit use of another algorithm than the default first-fit one. For example a custom algorithm could be used to manage alignment requirements. As I can't predict all the possible requirements/needs for all allocation uses cases, I add a "free" field 'void *data' to pass any needed information to the allocation function. For example 'data' could be used to handle a structure where you store the alignment, the expected memory bank, the requester device, or any information that could influence the allocation algorithm. An usage example may look like this: struct my_pool_constraints { int align; int bank; ... }; unsigned long my_custom_algo(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr, void *data) { struct my_pool_constraints *constraints = data; ... deal with allocation contraints ... return the index in bitmap where perform the allocation } void create_my_pool() { struct my_pool_constraints c; struct gen_pool *pool = gen_pool_create(...); gen_pool_add(pool, ...); gen_pool_set_algo(pool, my_custom_algo, &c); } Add of best-fit algorithm function: most of the time best-fit is slower then first-fit but memory fragmentation is lower. The random buffer allocation/free tests don't show any arithmetic relation between the allocation time and fragmentation but the best-fit algorithm is sometime able to perform the allocation when the first-fit can't. This new algorithm help to remove static allocations on ESRAM, a small but fast on-chip RAM of few KB, used for high-performance uses cases like DMA linked lists, graphic accelerators, encoders/decoders. On the Ux500 (in the ARM tree) we have define 5 ESRAM banks of 128 KB each and use of static allocations becomes unmaintainable: cd arch/arm/mach-ux500 && grep -r ESRAM . ./include/mach/db8500-regs.h:/* Base address and bank offsets for ESRAM */ ./include/mach/db8500-regs.h:#define U8500_ESRAM_BASE 0x40000000 ./include/mach/db8500-regs.h:#define U8500_ESRAM_BANK_SIZE 0x00020000 ./include/mach/db8500-regs.h:#define U8500_ESRAM_BANK0 U8500_ESRAM_BASE ./include/mach/db8500-regs.h:#define U8500_ESRAM_BANK1 (U8500_ESRAM_BASE + U8500_ESRAM_BANK_SIZE) ./include/mach/db8500-regs.h:#define U8500_ESRAM_BANK2 (U8500_ESRAM_BANK1 + U8500_ESRAM_BANK_SIZE) ./include/mach/db8500-regs.h:#define U8500_ESRAM_BANK3 (U8500_ESRAM_BANK2 + U8500_ESRAM_BANK_SIZE) ./include/mach/db8500-regs.h:#define U8500_ESRAM_BANK4 (U8500_ESRAM_BANK3 + U8500_ESRAM_BANK_SIZE) ./include/mach/db8500-regs.h:#define U8500_ESRAM_DMA_LCPA_OFFSET 0x10000 ./include/mach/db8500-regs.h:#define U8500_DMA_LCPA_BASE (U8500_ESRAM_BANK0 + U8500_ESRAM_DMA_LCPA_OFFSET) ./include/mach/db8500-regs.h:#define U8500_DMA_LCLA_BASE U8500_ESRAM_BANK4 I want to use genalloc to do dynamic allocations but I need to be able to fine tune the allocation algorithm. I my case best-fit algorithm give better results than first-fit, but it will not be true for every use case. Signed-off-by: Benjamin Gaignard <benjamin.gaignard@stericsson.com> Cc: Huang Ying <ying.huang@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'lib/genalloc.c')
-rw-r--r--lib/genalloc.c88
1 files changed, 84 insertions, 4 deletions
diff --git a/lib/genalloc.c b/lib/genalloc.c
index 6bc04aab6ec7..ca208a92628c 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -152,6 +152,8 @@ struct gen_pool *gen_pool_create(int min_alloc_order, int nid)
152 spin_lock_init(&pool->lock); 152 spin_lock_init(&pool->lock);
153 INIT_LIST_HEAD(&pool->chunks); 153 INIT_LIST_HEAD(&pool->chunks);
154 pool->min_alloc_order = min_alloc_order; 154 pool->min_alloc_order = min_alloc_order;
155 pool->algo = gen_pool_first_fit;
156 pool->data = NULL;
155 } 157 }
156 return pool; 158 return pool;
157} 159}
@@ -255,8 +257,9 @@ EXPORT_SYMBOL(gen_pool_destroy);
255 * @size: number of bytes to allocate from the pool 257 * @size: number of bytes to allocate from the pool
256 * 258 *
257 * Allocate the requested number of bytes from the specified pool. 259 * Allocate the requested number of bytes from the specified pool.
258 * Uses a first-fit algorithm. Can not be used in NMI handler on 260 * Uses the pool allocation function (with first-fit algorithm by default).
259 * architectures without NMI-safe cmpxchg implementation. 261 * Can not be used in NMI handler on architectures without
262 * NMI-safe cmpxchg implementation.
260 */ 263 */
261unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) 264unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size)
262{ 265{
@@ -280,8 +283,8 @@ unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size)
280 283
281 end_bit = (chunk->end_addr - chunk->start_addr) >> order; 284 end_bit = (chunk->end_addr - chunk->start_addr) >> order;
282retry: 285retry:
283 start_bit = bitmap_find_next_zero_area(chunk->bits, end_bit, 286 start_bit = pool->algo(chunk->bits, end_bit, start_bit, nbits,
284 start_bit, nbits, 0); 287 pool->data);
285 if (start_bit >= end_bit) 288 if (start_bit >= end_bit)
286 continue; 289 continue;
287 remain = bitmap_set_ll(chunk->bits, start_bit, nbits); 290 remain = bitmap_set_ll(chunk->bits, start_bit, nbits);
@@ -400,3 +403,80 @@ size_t gen_pool_size(struct gen_pool *pool)
400 return size; 403 return size;
401} 404}
402EXPORT_SYMBOL_GPL(gen_pool_size); 405EXPORT_SYMBOL_GPL(gen_pool_size);
406
407/**
408 * gen_pool_set_algo - set the allocation algorithm
409 * @pool: pool to change allocation algorithm
410 * @algo: custom algorithm function
411 * @data: additional data used by @algo
412 *
413 * Call @algo for each memory allocation in the pool.
414 * If @algo is NULL use gen_pool_first_fit as default
415 * memory allocation function.
416 */
417void gen_pool_set_algo(struct gen_pool *pool, genpool_algo_t algo, void *data)
418{
419 rcu_read_lock();
420
421 pool->algo = algo;
422 if (!pool->algo)
423 pool->algo = gen_pool_first_fit;
424
425 pool->data = data;
426
427 rcu_read_unlock();
428}
429EXPORT_SYMBOL(gen_pool_set_algo);
430
431/**
432 * gen_pool_first_fit - find the first available region
433 * of memory matching the size requirement (no alignment constraint)
434 * @map: The address to base the search on
435 * @size: The bitmap size in bits
436 * @start: The bitnumber to start searching at
437 * @nr: The number of zeroed bits we're looking for
438 * @data: additional data - unused
439 */
440unsigned long gen_pool_first_fit(unsigned long *map, unsigned long size,
441 unsigned long start, unsigned int nr, void *data)
442{
443 return bitmap_find_next_zero_area(map, size, start, nr, 0);
444}
445EXPORT_SYMBOL(gen_pool_first_fit);
446
447/**
448 * gen_pool_best_fit - find the best fitting region of memory
449 * macthing the size requirement (no alignment constraint)
450 * @map: The address to base the search on
451 * @size: The bitmap size in bits
452 * @start: The bitnumber to start searching at
453 * @nr: The number of zeroed bits we're looking for
454 * @data: additional data - unused
455 *
456 * Iterate over the bitmap to find the smallest free region
457 * which we can allocate the memory.
458 */
459unsigned long gen_pool_best_fit(unsigned long *map, unsigned long size,
460 unsigned long start, unsigned int nr, void *data)
461{
462 unsigned long start_bit = size;
463 unsigned long len = size + 1;
464 unsigned long index;
465
466 index = bitmap_find_next_zero_area(map, size, start, nr, 0);
467
468 while (index < size) {
469 int next_bit = find_next_bit(map, size, index + nr);
470 if ((next_bit - index) < len) {
471 len = next_bit - index;
472 start_bit = index;
473 if (len == nr)
474 return start_bit;
475 }
476 index = bitmap_find_next_zero_area(map, size,
477 next_bit + 1, nr, 0);
478 }
479
480 return start_bit;
481}
482EXPORT_SYMBOL(gen_pool_best_fit);