From a2e852364582e9c337f52bc53ccc33877c8f3b47 Mon Sep 17 00:00:00 2001 From: Alex Waterman Date: Wed, 18 Mar 2015 13:33:09 -0700 Subject: gpu: nvgpu: New allocator for VA space Implement a new buddy allocation scheme for the GPU's VA space. The bitmap allocator was using too much memory and is not a scaleable solution as the GPU's address space keeps getting bigger. The buddy allocation scheme is much more memory efficient when the majority of the address space is not allocated. The buddy allocator is not constrained by the notion of a split address space. The bitmap allocator could only manage either small pages or large pages but not both at the same time. Thus the bottom of the address space was for small pages, the top for large pages. Although, that split is not removed quite yet, the new allocator enables that to happen. The buddy allocator is also very scalable. It manages the relatively small comptag space to the enormous GPU VA space and everything in between. This is important since the GPU has lots of different sized spaces that need managing. Currently there are certain limitations. For one the allocator does not handle the fixed allocations from CUDA very well. It can do so but with certain caveats. The PTE page size is always set to small. This means the BA may place other small page allocations in the buddies around the fixed allocation. It does this to avoid having large and small page allocations in the same PDE. Change-Id: I501cd15af03611536490137331d43761c402c7f9 Signed-off-by: Alex Waterman Reviewed-on: http://git-master/r/740694 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom Tested-by: Terje Bergstrom --- drivers/gpu/nvgpu/gk20a/gk20a_allocator.c | 1167 +++++++++++++++++++++++++++-- 1 file changed, 1102 insertions(+), 65 deletions(-) (limited to 'drivers/gpu/nvgpu/gk20a/gk20a_allocator.c') diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c index 0037257c..56fb22df 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c @@ -1,7 +1,7 @@ /* * gk20a allocator * - * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -16,112 +16,1149 @@ * along with this program. If not, see . */ +#include +#include +#include +#include + +#include "platform_gk20a.h" #include "gk20a_allocator.h" -#include -/* init allocator struct */ -int gk20a_allocator_init(struct gk20a_allocator *allocator, - const char *name, u32 start, u32 len) +#include "mm_gk20a.h" + +static struct dentry *balloc_debugfs_root; + +static struct kmem_cache *buddy_cache; /* slab cache for meta data. */ + +static u32 balloc_tracing_on; + +#define balloc_trace_func() \ + do { \ + if (balloc_tracing_on) \ + trace_printk("%s\n", __func__); \ + } while (0) + +#define balloc_trace_func_done() \ + do { \ + if (balloc_tracing_on) \ + trace_printk("%s_done\n", __func__); \ + } while (0) + + +static void balloc_init_alloc_debug(struct gk20a_allocator *a); +static void balloc_print_stats(struct gk20a_allocator *a, struct seq_file *s, + int lock); +static struct gk20a_buddy *balloc_free_buddy(struct gk20a_allocator *a, + u64 addr); +static void balloc_coalesce(struct gk20a_allocator *a, struct gk20a_buddy *b); +static void __balloc_do_free_fixed(struct gk20a_allocator *a, + struct gk20a_fixed_alloc *falloc); + +/* + * This function is not present in older kernel's list.h code. + */ +#ifndef list_last_entry +#define list_last_entry(ptr, type, member) \ + list_entry((ptr)->prev, type, member) +#endif + +/* + * GPU buddy allocator for various address spaces. + * + * Current limitations: + * o A fixed allocation could potentially be made that borders PDEs with + * different PTE sizes. This would require that fixed buffer to have + * different sized PTEs for different parts of the allocation. Probably + * best to just require PDE alignment for fixed address allocs. + * + * o It is currently possible to make an allocator that has a buddy alignment + * out of sync with the PDE block size alignment. A simple example is a + * 32GB address space starting at byte 1. Every buddy is shifted off by 1 + * which means each buddy corresponf to more than one actual GPU page. The + * best way to fix this is probably just require PDE blocksize alignment + * for the start of the address space. At the moment all allocators are + * easily PDE aligned so this hasn't been a problem. + */ + +/* + * Pick a suitable maximum order for this allocator. + * + * Hueristic: Just guessing that the best max order is the largest single + * block that will fit in the address space. + */ +static void balloc_compute_max_order(struct gk20a_allocator *a) +{ + u64 true_max_order = ilog2(a->blks); + + if (a->max_order > true_max_order) + a->max_order = true_max_order; + if (a->max_order > GPU_BALLOC_MAX_ORDER) + a->max_order = GPU_BALLOC_MAX_ORDER; +} + +/* + * Since we can only allocate in chucks of a->blk_size we need to trim off + * any excess data that is not aligned to a->blk_size. + */ +static void balloc_allocator_align(struct gk20a_allocator *a) +{ + a->start = ALIGN(a->base, a->blk_size); + a->end = (a->base + a->length) & ~(a->blk_size - 1); + a->count = a->end - a->start; + a->blks = a->count >> a->blk_shift; +} + +/* + * Pass NULL for parent if you want a top level buddy. + */ +static struct gk20a_buddy *balloc_new_buddy(struct gk20a_allocator *a, + struct gk20a_buddy *parent, + u64 start, u64 order) +{ + struct gk20a_buddy *new_buddy; + + new_buddy = kmem_cache_alloc(buddy_cache, GFP_KERNEL); + if (!new_buddy) + return NULL; + + memset(new_buddy, 0, sizeof(struct gk20a_buddy)); + + new_buddy->parent = parent; + new_buddy->start = start; + new_buddy->order = order; + new_buddy->end = start + (1 << order) * a->blk_size; + + return new_buddy; +} + +static void __balloc_buddy_list_add(struct gk20a_allocator *a, + struct gk20a_buddy *b, + struct list_head *list) +{ + if (buddy_is_in_list(b)) { + balloc_dbg(a, "Oops: adding added buddy (%llu:0x%llx)\n", + b->order, b->start); + BUG(); + } + + /* + * Add big PTE blocks to the tail, small to the head for GVA spaces. + * This lets the code that checks if there are available blocks check + * without cycling through the entire list. + */ + if (a->flags & GPU_BALLOC_GVA_SPACE && + b->pte_size == BALLOC_PTE_SIZE_BIG) + list_add_tail(&b->buddy_entry, list); + else + list_add(&b->buddy_entry, list); + + buddy_set_in_list(b); +} + +static void __balloc_buddy_list_rem(struct gk20a_allocator *a, + struct gk20a_buddy *b) +{ + if (!buddy_is_in_list(b)) { + balloc_dbg(a, "Oops: removing removed buddy (%llu:0x%llx)\n", + b->order, b->start); + BUG(); + } + + list_del_init(&b->buddy_entry); + buddy_clr_in_list(b); +} + +/* + * Add a buddy to one of the buddy lists and deal with the necessary + * book keeping. Adds the buddy to the list specified by the buddy's order. + */ +static void balloc_blist_add(struct gk20a_allocator *a, struct gk20a_buddy *b) +{ + __balloc_buddy_list_add(a, b, balloc_get_order_list(a, b->order)); + a->buddy_list_len[b->order]++; +} + +static void balloc_blist_rem(struct gk20a_allocator *a, struct gk20a_buddy *b) +{ + __balloc_buddy_list_rem(a, b); + a->buddy_list_len[b->order]--; +} + +static u64 balloc_get_order(struct gk20a_allocator *a, u64 len) +{ + if (len == 0) + return 0; + + len--; + len >>= a->blk_shift; + + return fls(len); +} + +static u64 __balloc_max_order_in(struct gk20a_allocator *a, u64 start, u64 end) +{ + u64 size = (end - start) >> a->blk_shift; + + if (size > 0) + return min_t(u64, ilog2(size), a->max_order); + else + return GPU_BALLOC_MAX_ORDER; +} + +/* + * Initialize the buddy lists. + */ +static int balloc_init_lists(struct gk20a_allocator *a) +{ + int i; + u64 bstart, bend, order; + struct gk20a_buddy *buddy; + + bstart = a->start; + bend = a->end; + + /* First make sure the LLs are valid. */ + for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) + INIT_LIST_HEAD(balloc_get_order_list(a, i)); + + while (bstart < bend) { + order = __balloc_max_order_in(a, bstart, bend); + + buddy = balloc_new_buddy(a, NULL, bstart, order); + if (!buddy) + goto cleanup; + + balloc_blist_add(a, buddy); + bstart += balloc_order_to_len(a, order); + } + + return 0; + +cleanup: + for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) { + if (!list_empty(balloc_get_order_list(a, i))) { + buddy = list_first_entry(balloc_get_order_list(a, i), + struct gk20a_buddy, buddy_entry); + balloc_blist_rem(a, buddy); + kmem_cache_free(buddy_cache, buddy); + } + } + + return -ENOMEM; +} + +/* + * Initialize a buddy allocator. Returns 0 on success. This allocator does + * not necessarily manage bytes. It manages distinct ranges of resources. This + * allows the allocator to work for things like comp_tags, semaphores, etc. + * + * @allocator: Ptr to an allocator struct to init. + * @vm: GPU VM to associate this allocator with. Can be NULL. Will be used to + * get PTE size for GVA spaces. + * @name: Name of the allocator. Doesn't have to be static storage. + * @base: The base address of the resource pool being managed. + * @size: Number of resources in the pool. + * @blk_size: Minimum number of resources to allocate at once. For things like + * semaphores this is 1. For GVA this might be as much as 64k. This + * corresponds to order 0. Must be power of 2. + * @max_order: Pick a maximum order. If you leave this as 0, the buddy allocator + * will try and pick a reasonable max order. + * @flags: Extra flags necessary. See GPU_BALLOC_*. + */ +int __gk20a_allocator_init(struct gk20a_allocator *a, + struct vm_gk20a *vm, const char *name, + u64 base, u64 size, u64 blk_size, u64 max_order, + u64 flags) { - memset(allocator, 0, sizeof(struct gk20a_allocator)); + int err; + + memset(a, 0, sizeof(struct gk20a_allocator)); + strncpy(a->name, name, 32); + + a->base = base; + a->length = size; + a->blk_size = blk_size; + a->blk_shift = __ffs(blk_size); + + /* blk_size must be greater than 0 and a power of 2. */ + if (blk_size == 0) + return -EINVAL; + if (blk_size & (blk_size - 1)) + return -EINVAL; + + if (max_order > GPU_BALLOC_MAX_ORDER) + return -EINVAL; + + /* If this is to manage a GVA space we need a VM. */ + if (flags & GPU_BALLOC_GVA_SPACE && !vm) + return -EINVAL; + + a->vm = vm; + if (flags & GPU_BALLOC_GVA_SPACE) + a->pte_blk_order = balloc_get_order(a, vm->big_page_size << 10); - strncpy(allocator->name, name, 32); + a->flags = flags; + a->max_order = max_order; - allocator->base = start; - allocator->limit = start + len - 1; + balloc_allocator_align(a); + balloc_compute_max_order(a); - allocator->bitmap = vzalloc(BITS_TO_LONGS(len) * sizeof(long)); - if (!allocator->bitmap) + /* Shared buddy kmem_cache for all allocators. */ + if (!buddy_cache) + buddy_cache = KMEM_CACHE(gk20a_buddy, 0); + if (!buddy_cache) return -ENOMEM; - allocator_dbg(allocator, "%s : base %d, limit %d", - allocator->name, allocator->base); + a->alloced_buddies = RB_ROOT; + err = balloc_init_lists(a); + if (err) + return err; - init_rwsem(&allocator->rw_sema); + mutex_init(&a->lock); - allocator->alloc = gk20a_allocator_block_alloc; - allocator->free = gk20a_allocator_block_free; + a->init = 1; + + balloc_init_alloc_debug(a); + balloc_dbg(a, "New allocator: base 0x%llx\n", a->base); + balloc_dbg(a, " size 0x%llx\n", a->length); + balloc_dbg(a, " blk_size 0x%llx\n", a->blk_size); + balloc_dbg(a, " max_order %llu\n", a->max_order); + balloc_dbg(a, " flags 0x%llx\n", a->flags); return 0; } -/* destroy allocator, free all remaining blocks if any */ -void gk20a_allocator_destroy(struct gk20a_allocator *allocator) +int gk20a_allocator_init(struct gk20a_allocator *a, const char *name, + u64 base, u64 size, u64 blk_size) +{ + return __gk20a_allocator_init(a, NULL, name, + base, size, blk_size, 0, 0); +} + +/* + * Clean up and destroy the passed allocator. + */ +void gk20a_allocator_destroy(struct gk20a_allocator *a) { - down_write(&allocator->rw_sema); + struct rb_node *node; + struct gk20a_buddy *bud; + struct gk20a_fixed_alloc *falloc; + int i; + + balloc_lock(a); + + if (!IS_ERR_OR_NULL(a->debugfs_entry)) + debugfs_remove(a->debugfs_entry); + + /* + * Free the fixed allocs first. + */ + while ((node = rb_first(&a->fixed_allocs)) != NULL) { + falloc = container_of(node, + struct gk20a_fixed_alloc, alloced_entry); + + __balloc_do_free_fixed(a, falloc); + rb_erase(node, &a->fixed_allocs); + } + + /* + * And now free all outstanding allocations. + */ + while ((node = rb_first(&a->alloced_buddies)) != NULL) { + bud = container_of(node, struct gk20a_buddy, alloced_entry); + balloc_free_buddy(a, bud->start); + balloc_blist_add(a, bud); + balloc_coalesce(a, bud); + } - vfree(allocator->bitmap); + /* + * Now clean up the unallocated buddies. + */ + for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) { + BUG_ON(a->buddy_list_alloced[i] != 0); + + while (!list_empty(balloc_get_order_list(a, i))) { + bud = list_first_entry(balloc_get_order_list(a, i), + struct gk20a_buddy, buddy_entry); + balloc_blist_rem(a, bud); + kmem_cache_free(buddy_cache, bud); + } + + if (a->buddy_list_len[i] != 0) { + pr_info("Excess buddies!!! (%d: %llu)\n", + i, a->buddy_list_len[i]); + BUG(); + } + if (a->buddy_list_split[i] != 0) { + pr_info("Excess split nodes!!! (%d: %llu)\n", + i, a->buddy_list_split[i]); + BUG(); + } + if (a->buddy_list_alloced[i] != 0) { + pr_info("Excess alloced nodes!!! (%d: %llu)\n", + i, a->buddy_list_alloced[i]); + BUG(); + } + } - memset(allocator, 0, sizeof(struct gk20a_allocator)); + a->init = 0; + + balloc_unlock(a); + + /* + * We cant unlock an allocator after memsetting it. That wipes the + * state of the mutex. Hopefully no one uses the allocator after + * destroying it... + */ + memset(a, 0, sizeof(struct gk20a_allocator)); } /* - * *addr != ~0 for fixed address allocation. if *addr == 0, base addr is - * returned to caller in *addr. + * Combine the passed buddy if possible. The pointer in @b may not be valid + * after this as the buddy may be freed. * - * contiguous allocation, which allocates one block of - * contiguous address. -*/ -int gk20a_allocator_block_alloc(struct gk20a_allocator *allocator, - u32 *addr, u32 len, u32 align) + * @a must be locked. + */ +static void balloc_coalesce(struct gk20a_allocator *a, struct gk20a_buddy *b) { - unsigned long _addr; + struct gk20a_buddy *parent; - allocator_dbg(allocator, "[in] addr %d, len %d", *addr, len); + if (buddy_is_alloced(b) || buddy_is_split(b)) + return; - if ((*addr != 0 && *addr < allocator->base) || /* check addr range */ - *addr + len > allocator->limit || /* check addr range */ - *addr & (align - 1) || /* check addr alignment */ - len == 0) /* check len */ - return -EINVAL; + /* + * If both our buddy and I are both not allocated and not split then + * we can coalesce ourselves. + */ + if (!b->buddy) + return; + if (buddy_is_alloced(b->buddy) || buddy_is_split(b->buddy)) + return; + + parent = b->parent; + + balloc_blist_rem(a, b); + balloc_blist_rem(a, b->buddy); + + buddy_clr_split(parent); + a->buddy_list_split[parent->order]--; + balloc_blist_add(a, parent); + + /* + * Recursively coalesce as far as we can go. + */ + balloc_coalesce(a, parent); + + /* Clean up the remains. */ + kmem_cache_free(buddy_cache, b->buddy); + kmem_cache_free(buddy_cache, b); +} + +/* + * Split a buddy into two new buddies who are 1/2 the size of the parent buddy. + * + * @a must be locked. + */ +static int balloc_split_buddy(struct gk20a_allocator *a, struct gk20a_buddy *b, + int pte_size) +{ + struct gk20a_buddy *left, *right; + u64 half; - len = ALIGN(len, align); - if (!len) + left = balloc_new_buddy(a, b, b->start, b->order - 1); + if (!left) return -ENOMEM; - down_write(&allocator->rw_sema); + half = (b->end - b->start) / 2; - _addr = bitmap_find_next_zero_area(allocator->bitmap, - allocator->limit - allocator->base + 1, - *addr ? (*addr - allocator->base) : 0, - len, - align - 1); - if ((_addr > allocator->limit - allocator->base + 1) || - (*addr && *addr != (_addr + allocator->base))) { - up_write(&allocator->rw_sema); + right = balloc_new_buddy(a, b, b->start + half, b->order - 1); + if (!right) { + kmem_cache_free(buddy_cache, left); return -ENOMEM; } - bitmap_set(allocator->bitmap, _addr, len); - *addr = allocator->base + _addr; + buddy_set_split(b); + a->buddy_list_split[b->order]++; - up_write(&allocator->rw_sema); + b->left = left; + b->right = right; + left->buddy = right; + right->buddy = left; + left->parent = b; + right->parent = b; - allocator_dbg(allocator, "[out] addr %d, len %d", *addr, len); + /* PTE considerations. */ + if (a->flags & GPU_BALLOC_GVA_SPACE && + left->order <= a->pte_blk_order) { + left->pte_size = pte_size; + right->pte_size = pte_size; + } + + balloc_blist_rem(a, b); + balloc_blist_add(a, left); + balloc_blist_add(a, right); return 0; } -/* free all blocks between start and end */ -int gk20a_allocator_block_free(struct gk20a_allocator *allocator, - u32 addr, u32 len, u32 align) +/* + * Place the passed buddy into the RB tree for allocated buddies. Never fails + * unless the passed entry is a duplicate which is a bug. + * + * @a must be locked. + */ +void balloc_alloc_buddy(struct gk20a_allocator *a, struct gk20a_buddy *b) { - allocator_dbg(allocator, "[in] addr %d, len %d", addr, len); + struct rb_node **new = &(a->alloced_buddies.rb_node); + struct rb_node *parent = NULL; - if (addr + len > allocator->limit || /* check addr range */ - addr < allocator->base || - addr & (align - 1)) /* check addr alignment */ - return -EINVAL; + while (*new) { + struct gk20a_buddy *bud = container_of(*new, struct gk20a_buddy, + alloced_entry); - len = ALIGN(len, align); - if (!len) - return -EINVAL; + parent = *new; + if (b->start < bud->start) + new = &((*new)->rb_left); + else if (b->start > bud->start) + new = &((*new)->rb_right); + else + BUG_ON("Duplicate entries in allocated list!\n"); + } + + rb_link_node(&b->alloced_entry, parent, new); + rb_insert_color(&b->alloced_entry, &a->alloced_buddies); + + buddy_set_alloced(b); + a->buddy_list_alloced[b->order]++; +} + +/* + * Remove the passed buddy from the allocated buddy RB tree. Returns the + * deallocated buddy for further processing. + * + * @a must be locked. + */ +static struct gk20a_buddy *balloc_free_buddy(struct gk20a_allocator *a, + u64 addr) +{ + struct rb_node *node = a->alloced_buddies.rb_node; + struct gk20a_buddy *bud; + + while (node) { + bud = container_of(node, struct gk20a_buddy, alloced_entry); + + if (addr < bud->start) + node = node->rb_left; + else if (addr > bud->start) + node = node->rb_right; + else + break; + } + + if (!node) + return NULL; + + rb_erase(node, &a->alloced_buddies); + buddy_clr_alloced(bud); + a->buddy_list_alloced[bud->order]--; + + return bud; +} + +/* + * Find a suitable buddy for the given order and PTE type (big or little). + */ +static struct gk20a_buddy *__balloc_find_buddy(struct gk20a_allocator *a, + u64 order, int pte_size) +{ + struct gk20a_buddy *bud; + + if (list_empty(balloc_get_order_list(a, order))) + return NULL; + + if (a->flags & GPU_BALLOC_GVA_SPACE && + pte_size == BALLOC_PTE_SIZE_BIG) + bud = list_last_entry(balloc_get_order_list(a, order), + struct gk20a_buddy, buddy_entry); + else + bud = list_first_entry(balloc_get_order_list(a, order), + struct gk20a_buddy, buddy_entry); + + if (bud->pte_size != BALLOC_PTE_SIZE_ANY && + bud->pte_size != pte_size) + return NULL; + + return bud; +} + +/* + * Allocate a suitably sized buddy. If no suitable buddy exists split higher + * order buddies until we have a suitable buddy to allocate. + * + * For PDE grouping add an extra check to see if a buddy is suitable: that the + * buddy exists in a PDE who's PTE size is reasonable + * + * @a must be locked. + */ +static u64 __balloc_do_alloc(struct gk20a_allocator *a, u64 order, int pte_size) +{ + u64 split_order; + struct gk20a_buddy *bud; + + split_order = order; + while (!(bud = __balloc_find_buddy(a, split_order, pte_size))) + split_order++; + + while (bud->order != order) { + if (balloc_split_buddy(a, bud, pte_size)) + return 0; /* No mem... */ + bud = bud->left; + } + + balloc_blist_rem(a, bud); + balloc_alloc_buddy(a, bud); - down_write(&allocator->rw_sema); - bitmap_clear(allocator->bitmap, addr - allocator->base, len); - up_write(&allocator->rw_sema); + return bud->start; +} + +/* + * Allocate memory from the passed allocator. + */ +u64 gk20a_balloc(struct gk20a_allocator *a, u64 len) +{ + u64 order, addr; + int pte_size; + + balloc_trace_func(); + + balloc_lock(a); + + order = balloc_get_order(a, len); + + if (order > a->max_order) { + balloc_unlock(a); + balloc_dbg(a, "Alloc fail\n"); + balloc_trace_func_done(); + return 0; + } + + /* + * For now pass the base address of the allocator's region to + * __get_pte_size(). This ensures we get the right page size for + * the alloc but we don't have to know what the real address is + * going to be quite yet. + * + * TODO: once userspace supports a unified address space pass 0 for + * the base. This will make only 'len' affect the PTE size. + */ + if (a->flags & GPU_BALLOC_GVA_SPACE) + pte_size = __get_pte_size(a->vm, a->base, len); + else + pte_size = BALLOC_PTE_SIZE_ANY; + + addr = __balloc_do_alloc(a, order, pte_size); + + a->bytes_alloced += len; + a->bytes_alloced_real += balloc_order_to_len(a, order); + + balloc_unlock(a); + balloc_dbg(a, "Alloc 0x%-10llx %3lld:0x%-10llx pte_size=%s\n", + addr, order, len, + pte_size == gmmu_page_size_big ? "big" : + pte_size == gmmu_page_size_small ? "small" : + "NA/any"); + + balloc_trace_func_done(); + return addr; +} + +/* + * See if the passed range is actually available for allocation. If so, then + * return 1, otherwise return 0. + * + * TODO: Right now this uses the unoptimal approach of going through all + * outstanding allocations and checking their base/ends. This could be better. + */ +static int balloc_is_range_free(struct gk20a_allocator *a, u64 base, u64 end) +{ + struct rb_node *node; + struct gk20a_buddy *bud; + + node = rb_first(&a->alloced_buddies); + if (!node) + return 1; /* No allocs yet. */ + + bud = container_of(node, struct gk20a_buddy, alloced_entry); + + while (bud->start < end) { + if ((bud->start > base && bud->start < end) || + (bud->end > base && bud->end < end)) + return 0; + + node = rb_next(node); + if (!node) + break; + bud = container_of(node, struct gk20a_buddy, alloced_entry); + } + + return 1; +} + +static void balloc_alloc_fixed(struct gk20a_allocator *a, + struct gk20a_fixed_alloc *f) +{ + struct rb_node **new = &(a->fixed_allocs.rb_node); + struct rb_node *parent = NULL; + + while (*new) { + struct gk20a_fixed_alloc *falloc = + container_of(*new, struct gk20a_fixed_alloc, + alloced_entry); + + parent = *new; + if (f->start < falloc->start) + new = &((*new)->rb_left); + else if (f->start > falloc->start) + new = &((*new)->rb_right); + else + BUG_ON("Duplicate entries in allocated list!\n"); + } + + rb_link_node(&f->alloced_entry, parent, new); + rb_insert_color(&f->alloced_entry, &a->fixed_allocs); +} + +/* + * Remove the passed buddy from the allocated buddy RB tree. Returns the + * deallocated buddy for further processing. + * + * @a must be locked. + */ +static struct gk20a_fixed_alloc *balloc_free_fixed(struct gk20a_allocator *a, + u64 addr) +{ + struct rb_node *node = a->fixed_allocs.rb_node; + struct gk20a_fixed_alloc *falloc; + + while (node) { + falloc = container_of(node, + struct gk20a_fixed_alloc, alloced_entry); + + if (addr < falloc->start) + node = node->rb_left; + else if (addr > falloc->start) + node = node->rb_right; + else + break; + } + + if (!node) + return NULL; + + rb_erase(node, &a->fixed_allocs); + + return falloc; +} + +/* + * Find the parent range - doesn't necessarily need the parent to actually exist + * as a buddy. Finding an existing parent comes later... + */ +static void __balloc_get_parent_range(struct gk20a_allocator *a, + u64 base, u64 order, + u64 *pbase, u64 *porder) +{ + u64 base_mask; + u64 shifted_base = balloc_base_shift(a, base); + + order++; + base_mask = ~((a->blk_size << order) - 1); + + shifted_base &= base_mask; + + *pbase = balloc_base_unshift(a, shifted_base); + *porder = order; +} + +/* + * Makes a buddy at the passed address. This will make all parent buddies + * necessary for this buddy to exist as well. + */ +static struct gk20a_buddy *__balloc_make_fixed_buddy(struct gk20a_allocator *a, + u64 base, u64 order) +{ + struct gk20a_buddy *bud = NULL; + struct list_head *order_list; + u64 cur_order = order, cur_base = base; + + /* + * Algo: + * 1. Keep jumping up a buddy order until we find the real buddy that + * this buddy exists in. + * 2. Then work our way down through the buddy tree until we hit a dead + * end. + * 3. Start splitting buddies until we split to the one we need to + * make. + */ + while (cur_order <= a->max_order) { + int found = 0; + + order_list = balloc_get_order_list(a, cur_order); + list_for_each_entry(bud, order_list, buddy_entry) { + if (bud->start == cur_base) { + found = 1; + break; + } + } + + if (found) + break; + + __balloc_get_parent_range(a, cur_base, cur_order, + &cur_base, &cur_order); + } + + if (cur_order > a->max_order) { + balloc_dbg(a, "No buddy for range ???\n"); + return NULL; + } + + /* Split this buddy as necessary until we get the target buddy. */ + while (bud->start != base || bud->order != order) { + if (balloc_split_buddy(a, bud, BALLOC_PTE_SIZE_ANY)) { + balloc_coalesce(a, bud); + return NULL; + } + + if (base < bud->right->start) + bud = bud->left; + else + bud = bud->right; + + } + + return bud; +} + +static u64 __balloc_do_alloc_fixed(struct gk20a_allocator *a, + struct gk20a_fixed_alloc *falloc, + u64 base, u64 len) +{ + u64 shifted_base, inc_base; + u64 align_order; + + shifted_base = balloc_base_shift(a, base); + if (shifted_base == 0) + align_order = __fls(len >> a->blk_shift); + else + align_order = min_t(u64, + __ffs(shifted_base >> a->blk_shift), + __fls(len >> a->blk_shift)); + + if (align_order > a->max_order) { + balloc_dbg(a, "Align order too big: %llu > %llu\n", + align_order, a->max_order); + return 0; + } + + /* + * Generate a list of buddies that satisfy this allocation. + */ + inc_base = shifted_base; + while (inc_base < (shifted_base + len)) { + u64 order_len = balloc_order_to_len(a, align_order); + u64 remaining; + struct gk20a_buddy *bud; + + bud = __balloc_make_fixed_buddy(a, + balloc_base_unshift(a, inc_base), + align_order); + if (!bud) { + balloc_dbg(a, "Fixed buddy failed: {0x%llx, %llu}!\n", + balloc_base_unshift(a, inc_base), + align_order); + goto err_and_cleanup; + } + + balloc_blist_rem(a, bud); + balloc_alloc_buddy(a, bud); + __balloc_buddy_list_add(a, bud, &falloc->buddies); + + /* Book keeping. */ + inc_base += order_len; + remaining = (shifted_base + len) - inc_base; + align_order = __ffs(inc_base >> a->blk_shift); + + /* If we don't have much left - trim down align_order. */ + if (balloc_order_to_len(a, align_order) > remaining) + align_order = __balloc_max_order_in(a, inc_base, + inc_base + remaining); + } + + return base; - allocator_dbg(allocator, "[out] addr %d, len %d", addr, len); +err_and_cleanup: + while (!list_empty(&falloc->buddies)) { + struct gk20a_buddy *bud = list_first_entry(&falloc->buddies, + struct gk20a_buddy, + buddy_entry); + + __balloc_buddy_list_rem(a, bud); + balloc_free_buddy(a, bud->start); + kmem_cache_free(buddy_cache, bud); + } + + return 0; +} + +/* + * Allocate a fixed address allocation. The address of the allocation is @base + * and the length is @len. This is not a typical buddy allocator operation and + * as such has a high posibility of failure if the address space is heavily in + * use. + * + * Please do not use this function unless _absolutely_ necessary. + */ +u64 gk20a_balloc_fixed(struct gk20a_allocator *a, u64 base, u64 len) +{ + struct gk20a_fixed_alloc *falloc = NULL; + struct gk20a_buddy *bud; + u64 ret, real_bytes = 0; + + balloc_trace_func(); + + /* If base isn't aligned to an order 0 block, fail. */ + if (base & (a->blk_size - 1)) + goto fail; + + if (len == 0) + goto fail; + + falloc = kmalloc(sizeof(*falloc), GFP_KERNEL); + if (!falloc) + goto fail; + + INIT_LIST_HEAD(&falloc->buddies); + falloc->start = base; + falloc->end = base + len; + + balloc_lock(a); + if (!balloc_is_range_free(a, base, base + len)) { + balloc_dbg(a, "Range not free: 0x%llx -> 0x%llx\n", + base, base + len); + goto fail_unlock; + } + + ret = __balloc_do_alloc_fixed(a, falloc, base, len); + if (!ret) { + balloc_dbg(a, "Alloc-fixed failed ?? 0x%llx -> 0x%llx\n", + base, base + len); + goto fail_unlock; + } + + balloc_alloc_fixed(a, falloc); + + list_for_each_entry(bud, &falloc->buddies, buddy_entry) + real_bytes += (bud->end - bud->start); + + a->bytes_alloced += len; + a->bytes_alloced_real += real_bytes; + + balloc_unlock(a); + balloc_dbg(a, "Alloc (fixed) 0x%llx\n", base); + + balloc_trace_func_done(); + return base; + +fail_unlock: + balloc_unlock(a); +fail: + kfree(falloc); + balloc_trace_func_done(); + return 0; +} + +static void __balloc_do_free_fixed(struct gk20a_allocator *a, + struct gk20a_fixed_alloc *falloc) +{ + struct gk20a_buddy *bud; + + while (!list_empty(&falloc->buddies)) { + bud = list_first_entry(&falloc->buddies, + struct gk20a_buddy, + buddy_entry); + __balloc_buddy_list_rem(a, bud); + + balloc_free_buddy(a, bud->start); + balloc_blist_add(a, bud); + a->bytes_freed += balloc_order_to_len(a, bud->order); + + /* + * Attemp to defrag the allocation. + */ + balloc_coalesce(a, bud); + } + + kfree(falloc); +} + +/* + * Free the passed allocation. + */ +void gk20a_bfree(struct gk20a_allocator *a, u64 addr) +{ + struct gk20a_buddy *bud; + struct gk20a_fixed_alloc *falloc; + + balloc_trace_func(); + + if (!addr) { + balloc_trace_func_done(); + return; + } + + balloc_lock(a); + + /* + * First see if this is a fixed alloc. If not fall back to a regular + * buddy. + */ + falloc = balloc_free_fixed(a, addr); + if (falloc) { + __balloc_do_free_fixed(a, falloc); + goto done; + } + + bud = balloc_free_buddy(a, addr); + if (!bud) + goto done; + + balloc_blist_add(a, bud); + a->bytes_freed += balloc_order_to_len(a, bud->order); + + /* + * Attemp to defrag the allocation. + */ + balloc_coalesce(a, bud); + +done: + balloc_unlock(a); + balloc_dbg(a, "Free 0x%llx\n", addr); + balloc_trace_func_done(); + return; +} + +/* + * Print the buddy allocator top level stats. If you pass @s as NULL then the + * stats are printed to the kernel log. This lets this code be used for + * debugging purposes internal to the allocator. + */ +static void balloc_print_stats(struct gk20a_allocator *a, struct seq_file *s, + int lock) +{ +#define __balloc_pstat(s, fmt, arg...) \ + do { \ + if (s) \ + seq_printf(s, fmt, ##arg); \ + else \ + balloc_dbg(a, fmt, ##arg); \ + } while (0) + + int i; + struct rb_node *node; + struct gk20a_fixed_alloc *falloc; + + __balloc_pstat(s, "base = %llu, limit = %llu, blk_size = %llu\n", + a->base, a->length, a->blk_size); + __balloc_pstat(s, "Internal params:\n"); + __balloc_pstat(s, " start = %llu\n", a->start); + __balloc_pstat(s, " end = %llu\n", a->end); + __balloc_pstat(s, " count = %llu\n", a->count); + __balloc_pstat(s, " blks = %llu\n", a->blks); + __balloc_pstat(s, " max_order = %llu\n", a->max_order); + + __balloc_pstat(s, "Buddy blocks:\n"); + __balloc_pstat(s, " Order Free Alloced Split\n"); + __balloc_pstat(s, " ----- ---- ------- -----\n"); + + if (lock) + balloc_lock(a); + for (i = a->max_order; i >= 0; i--) { + if (a->buddy_list_len[i] == 0 && + a->buddy_list_alloced[i] == 0 && + a->buddy_list_split[i] == 0) + continue; + + __balloc_pstat(s, " %3d %-7llu %-9llu %llu\n", i, + a->buddy_list_len[i], + a->buddy_list_alloced[i], + a->buddy_list_split[i]); + } + + __balloc_pstat(s, "\n"); + + for (node = rb_first(&a->fixed_allocs), i = 1; + node != NULL; + node = rb_next(node)) { + falloc = container_of(node, + struct gk20a_fixed_alloc, alloced_entry); + + __balloc_pstat(s, "Fixed alloc (%d): [0x%llx -> 0x%llx]\n", + i, falloc->start, falloc->end); + } + + __balloc_pstat(s, "\n"); + __balloc_pstat(s, "Bytes allocated: %llu\n", a->bytes_alloced); + __balloc_pstat(s, "Bytes allocated (real): %llu\n", + a->bytes_alloced_real); + __balloc_pstat(s, "Bytes freed: %llu\n", a->bytes_freed); + + if (lock) + balloc_unlock(a); + +#undef __balloc_pstats +} + +static int __alloc_show(struct seq_file *s, void *unused) +{ + struct gk20a_allocator *a = s->private; + + balloc_print_stats(a, s, 1); return 0; } + +static int __alloc_open(struct inode *inode, struct file *file) +{ + return single_open(file, __alloc_show, inode->i_private); +} + +static const struct file_operations __alloc_fops = { + .open = __alloc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void balloc_init_alloc_debug(struct gk20a_allocator *a) +{ + if (!balloc_debugfs_root) + return; + + a->debugfs_entry = debugfs_create_file(a->name, S_IRUGO, + balloc_debugfs_root, + a, &__alloc_fops); +} + +void gk20a_alloc_debugfs_init(struct platform_device *pdev) +{ + struct gk20a_platform *platform = platform_get_drvdata(pdev); + struct dentry *gpu_root = platform->debugfs; + + balloc_debugfs_root = debugfs_create_dir("allocators", gpu_root); + if (IS_ERR_OR_NULL(balloc_debugfs_root)) + return; + + debugfs_create_u32("tracing", 0664, balloc_debugfs_root, + &balloc_tracing_on); +} -- cgit v1.2.2