From a9785995d5f22aaeb659285f8aeb64d8b56982e0 Mon Sep 17 00:00:00 2001 From: Arto Merilainen Date: Wed, 19 Mar 2014 09:38:25 +0200 Subject: gpu: nvgpu: Add NVIDIA GPU Driver This patch moves the NVIDIA GPU driver to a new location. Bug 1482562 Change-Id: I24293810b9d0f1504fd9be00135e21dad656ccb6 Signed-off-by: Arto Merilainen Reviewed-on: http://git-master/r/383722 Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gk20a/gk20a_allocator.c | 1247 +++++++++++++++++++++++++++++ 1 file changed, 1247 insertions(+) create mode 100644 drivers/gpu/nvgpu/gk20a/gk20a_allocator.c (limited to 'drivers/gpu/nvgpu/gk20a/gk20a_allocator.c') diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c new file mode 100644 index 00000000..32c003b6 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c @@ -0,0 +1,1247 @@ +/* + * gk20a allocator + * + * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "gk20a_allocator.h" + +static inline void link_block_list(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *block, + struct gk20a_alloc_block *prev, + struct rb_node *rb_parent); +static inline void link_block_rb(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *block, + struct rb_node **rb_link, + struct rb_node *rb_parent); +static void link_block(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *block, + struct gk20a_alloc_block *prev, struct rb_node **rb_link, + struct rb_node *rb_parent); +static void insert_block(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *block); + +static void unlink_block(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *block, + struct gk20a_alloc_block *prev); +static struct gk20a_alloc_block *unlink_blocks( + struct gk20a_allocator *allocator, + struct gk20a_alloc_block *block, + struct gk20a_alloc_block *prev, u32 end); + +static struct gk20a_alloc_block *find_block( + struct gk20a_allocator *allocator, u32 addr); +static struct gk20a_alloc_block *find_block_prev( + struct gk20a_allocator *allocator, u32 addr, + struct gk20a_alloc_block **pprev); +static struct gk20a_alloc_block *find_block_prepare( + struct gk20a_allocator *allocator, u32 addr, + struct gk20a_alloc_block **pprev, struct rb_node ***rb_link, + struct rb_node **rb_parent); + +static u32 check_free_space(u32 addr, u32 limit, u32 len, u32 align); +static void update_free_addr_cache(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *block, + u32 addr, u32 len, bool free); +static int find_free_area(struct gk20a_allocator *allocator, + u32 *addr, u32 len); +static int find_free_area_nc(struct gk20a_allocator *allocator, + u32 *addr, u32 *len); + +static void adjust_block(struct gk20a_alloc_block *block, + u32 start, u32 end, + struct gk20a_alloc_block *insert); +static struct gk20a_alloc_block *merge_block( + struct gk20a_allocator *allocator, + struct gk20a_alloc_block *block, u32 addr, u32 end); +static int split_block(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *block, + u32 addr, int new_below); + +static int block_alloc_single_locked(struct gk20a_allocator *allocator, + u32 *addr, u32 len); +static int block_alloc_list_locked(struct gk20a_allocator *allocator, + u32 *addr, u32 len, + struct gk20a_alloc_block **pblock); +static int block_free_locked(struct gk20a_allocator *allocator, + u32 addr, u32 len); +static void block_free_list_locked(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *list); + +/* link a block into allocator block list */ +static inline void link_block_list(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *block, + struct gk20a_alloc_block *prev, + struct rb_node *rb_parent) +{ + struct gk20a_alloc_block *next; + + block->prev = prev; + if (prev) { + next = prev->next; + prev->next = block; + } else { + allocator->block_first = block; + if (rb_parent) + next = rb_entry(rb_parent, + struct gk20a_alloc_block, rb); + else + next = NULL; + } + block->next = next; + if (next) + next->prev = block; +} + +/* link a block into allocator rb tree */ +static inline void link_block_rb(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *block, struct rb_node **rb_link, + struct rb_node *rb_parent) +{ + rb_link_node(&block->rb, rb_parent, rb_link); + rb_insert_color(&block->rb, &allocator->rb_root); +} + +/* add a block to allocator with known location */ +static void link_block(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *block, + struct gk20a_alloc_block *prev, struct rb_node **rb_link, + struct rb_node *rb_parent) +{ + struct gk20a_alloc_block *next; + + link_block_list(allocator, block, prev, rb_parent); + link_block_rb(allocator, block, rb_link, rb_parent); + allocator->block_count++; + + next = block->next; + allocator_dbg(allocator, "link new block %d:%d between block %d:%d and block %d:%d", + block->start, block->end, + prev ? prev->start : -1, prev ? prev->end : -1, + next ? next->start : -1, next ? next->end : -1); +} + +/* add a block to allocator */ +static void insert_block(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *block) +{ + struct gk20a_alloc_block *prev; + struct rb_node **rb_link, *rb_parent; + + find_block_prepare(allocator, block->start, + &prev, &rb_link, &rb_parent); + link_block(allocator, block, prev, rb_link, rb_parent); +} + +/* remove a block from allocator */ +static void unlink_block(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *block, + struct gk20a_alloc_block *prev) +{ + struct gk20a_alloc_block *next = block->next; + + allocator_dbg(allocator, "unlink block %d:%d between block %d:%d and block %d:%d", + block->start, block->end, + prev ? prev->start : -1, prev ? prev->end : -1, + next ? next->start : -1, next ? next->end : -1); + + BUG_ON(block->start < allocator->base); + BUG_ON(block->end > allocator->limit); + + if (prev) + prev->next = next; + else + allocator->block_first = next; + + if (next) + next->prev = prev; + rb_erase(&block->rb, &allocator->rb_root); + if (allocator->block_recent == block) + allocator->block_recent = prev; + + allocator->block_count--; +} + +/* remove a list of blocks from allocator. the list can contain both + regular blocks and non-contiguous blocks. skip all non-contiguous + blocks, remove regular blocks into a separate list, return list head */ +static struct gk20a_alloc_block * +unlink_blocks(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *block, + struct gk20a_alloc_block *prev, + u32 end) +{ + struct gk20a_alloc_block **insertion_point; + struct gk20a_alloc_block *last_unfreed_block = prev; + struct gk20a_alloc_block *last_freed_block = NULL; + struct gk20a_alloc_block *first_freed_block = NULL; + + insertion_point = (prev ? &prev->next : &allocator->block_first); + *insertion_point = NULL; + + do { + if (!block->nc_block) { + allocator_dbg(allocator, "unlink block %d:%d", + block->start, block->end); + if (last_freed_block) + last_freed_block->next = block; + block->prev = last_freed_block; + rb_erase(&block->rb, &allocator->rb_root); + last_freed_block = block; + allocator->block_count--; + if (!first_freed_block) + first_freed_block = block; + } else { + allocator_dbg(allocator, "skip nc block %d:%d", + block->start, block->end); + if (!*insertion_point) + *insertion_point = block; + if (last_unfreed_block) + last_unfreed_block->next = block; + block->prev = last_unfreed_block; + last_unfreed_block = block; + } + block = block->next; + } while (block && block->start < end); + + if (!*insertion_point) + *insertion_point = block; + + if (block) + block->prev = last_unfreed_block; + if (last_unfreed_block) + last_unfreed_block->next = block; + if (last_freed_block) + last_freed_block->next = NULL; + + allocator->block_recent = NULL; + + return first_freed_block; +} + +/* Look up the first block which satisfies addr < block->end, + NULL if none */ +static struct gk20a_alloc_block * +find_block(struct gk20a_allocator *allocator, u32 addr) +{ + struct gk20a_alloc_block *block = allocator->block_recent; + + if (!(block && block->end > addr && block->start <= addr)) { + struct rb_node *rb_node; + + rb_node = allocator->rb_root.rb_node; + block = NULL; + + while (rb_node) { + struct gk20a_alloc_block *block_tmp; + + block_tmp = rb_entry(rb_node, + struct gk20a_alloc_block, rb); + + if (block_tmp->end > addr) { + block = block_tmp; + if (block_tmp->start <= addr) + break; + rb_node = rb_node->rb_left; + } else + rb_node = rb_node->rb_right; + if (block) + allocator->block_recent = block; + } + } + return block; +} + +/* Same as find_block, but also return a pointer to the previous block */ +static struct gk20a_alloc_block * +find_block_prev(struct gk20a_allocator *allocator, u32 addr, + struct gk20a_alloc_block **pprev) +{ + struct gk20a_alloc_block *block = NULL, *prev = NULL; + struct rb_node *rb_node; + if (!allocator) + goto out; + + block = allocator->block_first; + + rb_node = allocator->rb_root.rb_node; + + while (rb_node) { + struct gk20a_alloc_block *block_tmp; + block_tmp = rb_entry(rb_node, struct gk20a_alloc_block, rb); + + if (addr < block_tmp->end) + rb_node = rb_node->rb_left; + else { + prev = block_tmp; + if (!prev->next || addr < prev->next->end) + break; + rb_node = rb_node->rb_right; + } + } + +out: + *pprev = prev; + return prev ? prev->next : block; +} + +/* Same as find_block, but also return a pointer to the previous block + and return rb_node to prepare for rbtree insertion */ +static struct gk20a_alloc_block * +find_block_prepare(struct gk20a_allocator *allocator, u32 addr, + struct gk20a_alloc_block **pprev, struct rb_node ***rb_link, + struct rb_node **rb_parent) +{ + struct gk20a_alloc_block *block; + struct rb_node **__rb_link, *__rb_parent, *rb_prev; + + __rb_link = &allocator->rb_root.rb_node; + rb_prev = __rb_parent = NULL; + block = NULL; + + while (*__rb_link) { + struct gk20a_alloc_block *block_tmp; + + __rb_parent = *__rb_link; + block_tmp = rb_entry(__rb_parent, + struct gk20a_alloc_block, rb); + + if (block_tmp->end > addr) { + block = block_tmp; + if (block_tmp->start <= addr) + break; + __rb_link = &__rb_parent->rb_left; + } else { + rb_prev = __rb_parent; + __rb_link = &__rb_parent->rb_right; + } + } + + *pprev = NULL; + if (rb_prev) + *pprev = rb_entry(rb_prev, struct gk20a_alloc_block, rb); + *rb_link = __rb_link; + *rb_parent = __rb_parent; + return block; +} + +/* return available space */ +static u32 check_free_space(u32 addr, u32 limit, u32 len, u32 align) +{ + if (addr >= limit) + return 0; + if (addr + len <= limit) + return len; + return (limit - addr) & ~(align - 1); +} + +/* update first_free_addr/last_free_addr based on new free addr + called when free block(s) and allocate block(s) */ +static void update_free_addr_cache(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *next, + u32 addr, u32 len, bool free) +{ + /* update from block free */ + if (free) { + if (allocator->first_free_addr > addr) + allocator->first_free_addr = addr; + } else { /* update from block alloc */ + if (allocator->last_free_addr < addr + len) + allocator->last_free_addr = addr + len; + if (allocator->first_free_addr == addr) { + if (!next || next->start > addr + len) + allocator->first_free_addr = addr + len; + else + allocator->first_free_addr = next->end; + } + } + + if (allocator->first_free_addr > allocator->last_free_addr) + allocator->first_free_addr = allocator->last_free_addr; +} + +/* find a free address range for a fixed len */ +static int find_free_area(struct gk20a_allocator *allocator, + u32 *addr, u32 len) +{ + struct gk20a_alloc_block *block; + u32 start_addr, search_base, search_limit; + + /* fixed addr allocation */ + /* note: constraints for fixed are handled by caller */ + if (*addr) { + block = find_block(allocator, *addr); + if (allocator->limit - len >= *addr && + (!block || *addr + len <= block->start)) { + update_free_addr_cache(allocator, block, + *addr, len, false); + return 0; + } else + return -ENOMEM; + } + + if (!allocator->constraint.enable) { + search_base = allocator->base; + search_limit = allocator->limit; + } else { + start_addr = *addr = allocator->constraint.base; + search_base = allocator->constraint.base; + search_limit = allocator->constraint.limit; + } + + /* cached_hole_size has max free space up to last_free_addr */ + if (len > allocator->cached_hole_size) + start_addr = *addr = allocator->last_free_addr; + else { + start_addr = *addr = allocator->base; + allocator->cached_hole_size = 0; + } + + allocator_dbg(allocator, "start search addr : %d", start_addr); + +full_search: + for (block = find_block(allocator, *addr);; block = block->next) { + if (search_limit - len < *addr) { + /* start a new search in case we missed any hole */ + if (start_addr != search_base) { + start_addr = *addr = search_base; + allocator->cached_hole_size = 0; + allocator_dbg(allocator, "start a new search from base"); + goto full_search; + } + return -ENOMEM; + } + if (!block || *addr + len <= block->start) { + update_free_addr_cache(allocator, block, + *addr, len, false); + allocator_dbg(allocator, "free space from %d, len %d", + *addr, len); + allocator_dbg(allocator, "next free addr: %d", + allocator->last_free_addr); + return 0; + } + if (*addr + allocator->cached_hole_size < block->start) + allocator->cached_hole_size = block->start - *addr; + *addr = block->end; + } +} + +/* find a free address range for as long as it meets alignment or meet len */ +static int find_free_area_nc(struct gk20a_allocator *allocator, + u32 *addr, u32 *len) +{ + struct gk20a_alloc_block *block; + u32 start_addr; + u32 avail_len; + + /* fixed addr allocation */ + if (*addr) { + block = find_block(allocator, *addr); + if (allocator->limit - *len >= *addr) { + if (!block) + return 0; + + avail_len = check_free_space(*addr, block->start, + *len, allocator->align); + if (avail_len != 0) { + update_free_addr_cache(allocator, block, + *addr, avail_len, false); + allocator_dbg(allocator, + "free space between %d, %d, len %d", + *addr, block->start, avail_len); + allocator_dbg(allocator, "next free addr: %d", + allocator->last_free_addr); + *len = avail_len; + return 0; + } else + return -ENOMEM; + } else + return -ENOMEM; + } + + start_addr = *addr = allocator->first_free_addr; + + allocator_dbg(allocator, "start search addr : %d", start_addr); + + for (block = find_block(allocator, *addr);; block = block->next) { + if (allocator->limit - *len < *addr) + return -ENOMEM; + if (!block) { + update_free_addr_cache(allocator, block, + *addr, *len, false); + allocator_dbg(allocator, "free space from %d, len %d", + *addr, *len); + allocator_dbg(allocator, "next free addr: %d", + allocator->first_free_addr); + return 0; + } + + avail_len = check_free_space(*addr, block->start, + *len, allocator->align); + if (avail_len != 0) { + update_free_addr_cache(allocator, block, + *addr, avail_len, false); + allocator_dbg(allocator, "free space between %d, %d, len %d", + *addr, block->start, avail_len); + allocator_dbg(allocator, "next free addr: %d", + allocator->first_free_addr); + *len = avail_len; + return 0; + } + if (*addr + allocator->cached_hole_size < block->start) + allocator->cached_hole_size = block->start - *addr; + *addr = block->end; + } +} + +/* expand/shrink a block with new start and new end + split_block function provides insert block for shrink */ +static void adjust_block(struct gk20a_alloc_block *block, + u32 start, u32 end, struct gk20a_alloc_block *insert) +{ + struct gk20a_allocator *allocator = block->allocator; + + allocator_dbg(allocator, "curr block %d:%d, new start %d, new end %d", + block->start, block->end, start, end); + + /* expand */ + if (!insert) { + if (start == block->end) { + struct gk20a_alloc_block *next = block->next; + + if (next && end == next->start) { + /* ....AAAA.... */ + /* PPPP....NNNN */ + /* PPPPPPPPPPPP */ + unlink_block(allocator, next, block); + block->end = next->end; + kmem_cache_free(allocator->block_cache, next); + } else { + /* ....AAAA.... */ + /* PPPP........ */ + /* PPPPPPPP.... */ + block->end = end; + } + } + + if (end == block->start) { + /* ....AAAA.... */ + /* ........NNNN */ + /* PP..NNNNNNNN ....NNNNNNNN */ + block->start = start; + } + } else { /* shrink */ + /* BBBBBBBB -> BBBBIIII OR BBBBBBBB -> IIIIBBBB */ + block->start = start; + block->end = end; + insert_block(allocator, insert); + } +} + +/* given a range [addr, end], merge it with blocks before or after or both + if they can be combined into a contiguous block */ +static struct gk20a_alloc_block * +merge_block(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *prev, u32 addr, u32 end) +{ + struct gk20a_alloc_block *next; + + if (prev) + next = prev->next; + else + next = allocator->block_first; + + allocator_dbg(allocator, "curr block %d:%d", addr, end); + if (prev) + allocator_dbg(allocator, "prev block %d:%d", + prev->start, prev->end); + if (next) + allocator_dbg(allocator, "next block %d:%d", + next->start, next->end); + + /* don't merge with non-contiguous allocation block */ + if (prev && prev->end == addr && !prev->nc_block) { + adjust_block(prev, addr, end, NULL); + return prev; + } + + /* don't merge with non-contiguous allocation block */ + if (next && end == next->start && !next->nc_block) { + adjust_block(next, addr, end, NULL); + return next; + } + + return NULL; +} + +/* split a block based on addr. addr must be within (start, end). + if new_below == 1, link new block before adjusted current block */ +static int split_block(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *block, u32 addr, int new_below) +{ + struct gk20a_alloc_block *new_block; + + allocator_dbg(allocator, "start %d, split %d, end %d, new_below %d", + block->start, addr, block->end, new_below); + + BUG_ON(!(addr > block->start && addr < block->end)); + + new_block = kmem_cache_alloc(allocator->block_cache, GFP_KERNEL); + if (!new_block) + return -ENOMEM; + + *new_block = *block; + + if (new_below) + new_block->end = addr; + else + new_block->start = addr; + + if (new_below) + adjust_block(block, addr, block->end, new_block); + else + adjust_block(block, block->start, addr, new_block); + + return 0; +} + +/* free a list of blocks */ +static void free_blocks(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *block) +{ + struct gk20a_alloc_block *curr_block; + while (block) { + curr_block = block; + block = block->next; + kmem_cache_free(allocator->block_cache, curr_block); + } +} + +/* called with rw_sema acquired */ +static int block_alloc_single_locked(struct gk20a_allocator *allocator, + u32 *addr_req, u32 len) +{ + struct gk20a_alloc_block *block, *prev; + struct rb_node **rb_link, *rb_parent; + u32 addr = *addr_req; + int err; + + *addr_req = ~0; + + err = find_free_area(allocator, &addr, len); + if (err) + return err; + + find_block_prepare(allocator, addr, &prev, &rb_link, &rb_parent); + + /* merge requested free space with existing block(s) + if they can be combined into one contiguous block */ + block = merge_block(allocator, prev, addr, addr + len); + if (block) { + *addr_req = addr; + return 0; + } + + /* create a new block if cannot merge */ + block = kmem_cache_zalloc(allocator->block_cache, GFP_KERNEL); + if (!block) + return -ENOMEM; + + block->allocator = allocator; + block->start = addr; + block->end = addr + len; + + link_block(allocator, block, prev, rb_link, rb_parent); + + *addr_req = addr; + + return 0; +} + +static int block_alloc_list_locked(struct gk20a_allocator *allocator, + u32 *addr_req, u32 nc_len, struct gk20a_alloc_block **pblock) +{ + struct gk20a_alloc_block *block; + struct gk20a_alloc_block *nc_head = NULL, *nc_prev = NULL; + u32 addr = *addr_req, len = nc_len; + int err = 0; + + *addr_req = ~0; + + while (nc_len > 0) { + err = find_free_area_nc(allocator, &addr, &len); + if (err) { + allocator_dbg(allocator, "not enough free space"); + goto clean_up; + } + + /* never merge non-contiguous allocation block, + just create a new block */ + block = kmem_cache_zalloc(allocator->block_cache, + GFP_KERNEL); + if (!block) { + err = -ENOMEM; + goto clean_up; + } + + block->allocator = allocator; + block->start = addr; + block->end = addr + len; + + insert_block(allocator, block); + + block->nc_prev = nc_prev; + if (nc_prev) + nc_prev->nc_next = block; + nc_prev = block; + block->nc_block = true; + + if (!nc_head) + nc_head = block; + + if (*addr_req == ~0) + *addr_req = addr; + + addr = 0; + nc_len -= len; + len = nc_len; + allocator_dbg(allocator, "remaining length %d", nc_len); + } + +clean_up: + if (err) { + while (nc_head) { + unlink_block(allocator, nc_head, nc_head->prev); + nc_prev = nc_head; + nc_head = nc_head->nc_next; + kmem_cache_free(allocator->block_cache, nc_prev); + } + *pblock = NULL; + *addr_req = ~0; + } else { + *pblock = nc_head; + } + + return err; +} + +/* called with rw_sema acquired */ +static int block_free_locked(struct gk20a_allocator *allocator, + u32 addr, u32 len) +{ + struct gk20a_alloc_block *block, *prev, *last; + u32 end; + int err; + + /* no block has block->end > addr, already free */ + block = find_block_prev(allocator, addr, &prev); + if (!block) + return 0; + + allocator_dbg(allocator, "first block in free range %d:%d", + block->start, block->end); + + end = addr + len; + /* not in any block, already free */ + if (block->start >= end) + return 0; + + /* don't touch nc_block in range free */ + if (addr > block->start && !block->nc_block) { + int err = split_block(allocator, block, addr, 0); + if (err) + return err; + prev = block; + } + + last = find_block(allocator, end); + if (last && end > last->start && !last->nc_block) { + + allocator_dbg(allocator, "last block in free range %d:%d", + last->start, last->end); + + err = split_block(allocator, last, end, 1); + if (err) + return err; + } + + block = prev ? prev->next : allocator->block_first; + + allocator_dbg(allocator, "first block for free %d:%d", + block->start, block->end); + + /* remove blocks between [addr, addr + len) from rb tree + and put them in a list */ + block = unlink_blocks(allocator, block, prev, end); + free_blocks(allocator, block); + + update_free_addr_cache(allocator, NULL, addr, len, true); + + return 0; +} + +/* called with rw_sema acquired */ +static void block_free_list_locked(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *list) +{ + struct gk20a_alloc_block *block; + u32 len; + + update_free_addr_cache(allocator, NULL, + list->start, list->end - list->start, true); + + while (list) { + block = list; + unlink_block(allocator, block, block->prev); + + len = block->end - block->start; + if (allocator->cached_hole_size < len) + allocator->cached_hole_size = len; + + list = block->nc_next; + kmem_cache_free(allocator->block_cache, block); + } +} + +static int +gk20a_allocator_constrain(struct gk20a_allocator *a, + bool enable, u32 base, u32 limit) +{ + if (enable) { + a->constraint.enable = (base >= a->base && + limit <= a->limit); + if (!a->constraint.enable) + return -EINVAL; + a->constraint.base = base; + a->constraint.limit = limit; + a->first_free_addr = a->last_free_addr = base; + + } else { + a->constraint.enable = false; + a->first_free_addr = a->last_free_addr = a->base; + } + + a->cached_hole_size = 0; + + return 0; +} + +/* init allocator struct */ +int gk20a_allocator_init(struct gk20a_allocator *allocator, + const char *name, u32 start, u32 len, u32 align) +{ + memset(allocator, 0, sizeof(struct gk20a_allocator)); + + strncpy(allocator->name, name, 32); + + allocator->block_cache = + kmem_cache_create(allocator->name, + sizeof(struct gk20a_alloc_block), 0, + SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); + if (!allocator->block_cache) + return -ENOMEM; + + allocator->rb_root = RB_ROOT; + + allocator->base = start; + allocator->limit = start + len - 1; + allocator->align = align; + + allocator_dbg(allocator, "%s : base %d, limit %d, align %d", + allocator->name, allocator->base, + allocator->limit, allocator->align); + + allocator->first_free_addr = allocator->last_free_addr = start; + allocator->cached_hole_size = len; + + init_rwsem(&allocator->rw_sema); + + allocator->alloc = gk20a_allocator_block_alloc; + allocator->alloc_nc = gk20a_allocator_block_alloc_nc; + allocator->free = gk20a_allocator_block_free; + allocator->free_nc = gk20a_allocator_block_free_nc; + allocator->constrain = gk20a_allocator_constrain; + + return 0; +} + +/* destroy allocator, free all remaining blocks if any */ +void gk20a_allocator_destroy(struct gk20a_allocator *allocator) +{ + struct gk20a_alloc_block *block, *next; + u32 free_count = 0; + + down_write(&allocator->rw_sema); + + for (block = allocator->block_first; block; ) { + allocator_dbg(allocator, "free remaining block %d:%d", + block->start, block->end); + next = block->next; + kmem_cache_free(allocator->block_cache, block); + free_count++; + block = next; + } + + up_write(&allocator->rw_sema); + + /* block_count doesn't match real number of blocks */ + BUG_ON(free_count != allocator->block_count); + + kmem_cache_destroy(allocator->block_cache); + + memset(allocator, 0, sizeof(struct gk20a_allocator)); +} + +/* + * *addr != ~0 for fixed address allocation. if *addr == 0, base addr is + * returned to caller in *addr. + * + * contiguous allocation, which allocates one block of + * contiguous address. +*/ +int gk20a_allocator_block_alloc(struct gk20a_allocator *allocator, + u32 *addr, u32 len) +{ + int ret; +#if defined(ALLOCATOR_DEBUG) + struct gk20a_alloc_block *block; + bool should_fail = false; +#endif + + allocator_dbg(allocator, "[in] addr %d, len %d", *addr, len); + + if (*addr + len > allocator->limit || /* check addr range */ + *addr & (allocator->align - 1) || /* check addr alignment */ + len == 0) /* check len */ + return -EINVAL; + + if (allocator->constraint.enable && + (*addr + len > allocator->constraint.limit || + *addr > allocator->constraint.base)) + return -EINVAL; + + len = ALIGN(len, allocator->align); + if (!len) + return -ENOMEM; + + down_write(&allocator->rw_sema); + +#if defined(ALLOCATOR_DEBUG) + if (*addr) { + for (block = allocator->block_first; + block; block = block->next) { + if (block->end > *addr && block->start < *addr + len) { + should_fail = true; + break; + } + } + } +#endif + + ret = block_alloc_single_locked(allocator, addr, len); + +#if defined(ALLOCATOR_DEBUG) + if (!ret) { + bool allocated = false; + BUG_ON(should_fail); + BUG_ON(*addr < allocator->base); + BUG_ON(*addr + len > allocator->limit); + for (block = allocator->block_first; + block; block = block->next) { + if (!block->nc_block && + block->start <= *addr && + block->end >= *addr + len) { + allocated = true; + break; + } + } + BUG_ON(!allocated); + } +#endif + + up_write(&allocator->rw_sema); + + allocator_dbg(allocator, "[out] addr %d, len %d", *addr, len); + + return ret; +} + +/* + * *addr != ~0 for fixed address allocation. if *addr == 0, base addr is + * returned to caller in *addr. + * + * non-contiguous allocation, which returns a list of blocks with aggregated + * size == len. Individual block size must meet alignment requirement. + */ +int gk20a_allocator_block_alloc_nc(struct gk20a_allocator *allocator, + u32 *addr, u32 len, struct gk20a_alloc_block **pblock) +{ + int ret; + + allocator_dbg(allocator, "[in] addr %d, len %d", *addr, len); + + BUG_ON(pblock == NULL); + *pblock = NULL; + + if (*addr + len > allocator->limit || /* check addr range */ + *addr & (allocator->align - 1) || /* check addr alignment */ + len == 0) /* check len */ + return -EINVAL; + + len = ALIGN(len, allocator->align); + if (!len) + return -ENOMEM; + + down_write(&allocator->rw_sema); + + ret = block_alloc_list_locked(allocator, addr, len, pblock); + +#if defined(ALLOCATOR_DEBUG) + if (!ret) { + struct gk20a_alloc_block *block = *pblock; + BUG_ON(!block); + BUG_ON(block->start < allocator->base); + while (block->nc_next) { + BUG_ON(block->end > block->nc_next->start); + block = block->nc_next; + } + BUG_ON(block->end > allocator->limit); + } +#endif + + up_write(&allocator->rw_sema); + + allocator_dbg(allocator, "[out] addr %d, len %d", *addr, len); + + return ret; +} + +/* free all blocks between start and end */ +int gk20a_allocator_block_free(struct gk20a_allocator *allocator, + u32 addr, u32 len) +{ + int ret; + + allocator_dbg(allocator, "[in] addr %d, len %d", addr, len); + + if (addr + len > allocator->limit || /* check addr range */ + addr < allocator->base || + addr & (allocator->align - 1)) /* check addr alignment */ + return -EINVAL; + + len = ALIGN(len, allocator->align); + if (!len) + return -EINVAL; + + down_write(&allocator->rw_sema); + + ret = block_free_locked(allocator, addr, len); + +#if defined(ALLOCATOR_DEBUG) + if (!ret) { + struct gk20a_alloc_block *block; + for (block = allocator->block_first; + block; block = block->next) { + if (!block->nc_block) + BUG_ON(block->start >= addr && + block->end <= addr + len); + } + } +#endif + up_write(&allocator->rw_sema); + + allocator_dbg(allocator, "[out] addr %d, len %d", addr, len); + + return ret; +} + +/* free non-contiguous allocation block list */ +void gk20a_allocator_block_free_nc(struct gk20a_allocator *allocator, + struct gk20a_alloc_block *block) +{ + /* nothing to free */ + if (!block) + return; + + down_write(&allocator->rw_sema); + block_free_list_locked(allocator, block); + up_write(&allocator->rw_sema); +} + +#if defined(ALLOCATOR_DEBUG) + +#include + +/* test suite */ +void gk20a_allocator_test(void) +{ + struct gk20a_allocator allocator; + struct gk20a_alloc_block *list[5]; + u32 addr, len; + u32 count; + int n; + + gk20a_allocator_init(&allocator, "test", 0, 10, 1); + + /* alloc/free a single block in the beginning */ + addr = 0; + gk20a_allocator_block_alloc(&allocator, &addr, 2); + gk20a_allocator_dump(&allocator); + gk20a_allocator_block_free(&allocator, addr, 2); + gk20a_allocator_dump(&allocator); + /* alloc/free a single block in the middle */ + addr = 4; + gk20a_allocator_block_alloc(&allocator, &addr, 2); + gk20a_allocator_dump(&allocator); + gk20a_allocator_block_free(&allocator, addr, 2); + gk20a_allocator_dump(&allocator); + /* alloc/free a single block in the end */ + addr = 8; + gk20a_allocator_block_alloc(&allocator, &addr, 2); + gk20a_allocator_dump(&allocator); + gk20a_allocator_block_free(&allocator, addr, 2); + gk20a_allocator_dump(&allocator); + + /* allocate contiguous blocks */ + addr = 0; + gk20a_allocator_block_alloc(&allocator, &addr, 2); + gk20a_allocator_dump(&allocator); + addr = 0; + gk20a_allocator_block_alloc(&allocator, &addr, 4); + gk20a_allocator_dump(&allocator); + addr = 0; + gk20a_allocator_block_alloc(&allocator, &addr, 4); + gk20a_allocator_dump(&allocator); + + /* no free space */ + addr = 0; + gk20a_allocator_block_alloc(&allocator, &addr, 2); + gk20a_allocator_dump(&allocator); + + /* free in the end */ + gk20a_allocator_block_free(&allocator, 8, 2); + gk20a_allocator_dump(&allocator); + /* free in the beginning */ + gk20a_allocator_block_free(&allocator, 0, 2); + gk20a_allocator_dump(&allocator); + /* free in the middle */ + gk20a_allocator_block_free(&allocator, 4, 2); + gk20a_allocator_dump(&allocator); + + /* merge case PPPPAAAANNNN */ + addr = 4; + gk20a_allocator_block_alloc(&allocator, &addr, 2); + gk20a_allocator_dump(&allocator); + /* merge case ....AAAANNNN */ + addr = 0; + gk20a_allocator_block_alloc(&allocator, &addr, 2); + gk20a_allocator_dump(&allocator); + /* merge case PPPPAAAA.... */ + addr = 8; + gk20a_allocator_block_alloc(&allocator, &addr, 2); + gk20a_allocator_dump(&allocator); + + /* test free across multiple blocks and split */ + gk20a_allocator_block_free(&allocator, 2, 2); + gk20a_allocator_dump(&allocator); + gk20a_allocator_block_free(&allocator, 6, 2); + gk20a_allocator_dump(&allocator); + gk20a_allocator_block_free(&allocator, 1, 8); + gk20a_allocator_dump(&allocator); + + /* test non-contiguous allocation */ + addr = 4; + gk20a_allocator_block_alloc(&allocator, &addr, 2); + gk20a_allocator_dump(&allocator); + addr = 0; + gk20a_allocator_block_alloc_nc(&allocator, &addr, 5, &list[0]); + gk20a_allocator_dump(&allocator); + gk20a_allocator_dump_nc_list(&allocator, list[0]); + + /* test free a range overlaping non-contiguous blocks */ + gk20a_allocator_block_free(&allocator, 2, 6); + gk20a_allocator_dump(&allocator); + + /* test non-contiguous free */ + gk20a_allocator_block_free_nc(&allocator, list[0]); + gk20a_allocator_dump(&allocator); + + gk20a_allocator_destroy(&allocator); + + /* random stress test */ + gk20a_allocator_init(&allocator, "test", 4096, 4096 * 1024, 4096); + for (;;) { + pr_debug("alloc tests...\n"); + for (count = 0; count < 50; count++) { + addr = 0; + len = random32() % (4096 * 1024 / 16); + gk20a_allocator_block_alloc(&allocator, &addr, len); + gk20a_allocator_dump(&allocator); + } + + pr_debug("free tests...\n"); + for (count = 0; count < 30; count++) { + addr = (random32() % (4096 * 1024)) & ~(4096 - 1); + len = random32() % (4096 * 1024 / 16); + gk20a_allocator_block_free(&allocator, addr, len); + gk20a_allocator_dump(&allocator); + } + + pr_debug("non-contiguous alloc tests...\n"); + for (n = 0; n < 5; n++) { + addr = 0; + len = random32() % (4096 * 1024 / 8); + gk20a_allocator_block_alloc_nc(&allocator, &addr, + len, &list[n]); + gk20a_allocator_dump(&allocator); + gk20a_allocator_dump_nc_list(&allocator, list[n]); + } + + pr_debug("free tests...\n"); + for (count = 0; count < 10; count++) { + addr = (random32() % (4096 * 1024)) & ~(4096 - 1); + len = random32() % (4096 * 1024 / 16); + gk20a_allocator_block_free(&allocator, addr, len); + gk20a_allocator_dump(&allocator); + } + + pr_debug("non-contiguous free tests...\n"); + for (n = 4; n >= 0; n--) { + gk20a_allocator_dump_nc_list(&allocator, list[n]); + gk20a_allocator_block_free_nc(&allocator, list[n]); + gk20a_allocator_dump(&allocator); + } + + pr_debug("fixed addr alloc tests...\n"); + for (count = 0; count < 10; count++) { + addr = (random32() % (4096 * 1024)) & ~(4096 - 1); + len = random32() % (4096 * 1024 / 32); + gk20a_allocator_block_alloc(&allocator, &addr, len); + gk20a_allocator_dump(&allocator); + } + + pr_debug("free tests...\n"); + for (count = 0; count < 10; count++) { + addr = (random32() % (4096 * 1024)) & ~(4096 - 1); + len = random32() % (4096 * 1024 / 16); + gk20a_allocator_block_free(&allocator, addr, len); + gk20a_allocator_dump(&allocator); + } + } + gk20a_allocator_destroy(&allocator); +} + +#endif /* ALLOCATOR_DEBUG */ + -- cgit v1.2.2