From 5672cbdf6d8e7b8b93a08cd388097e2d1f0a8843 Mon Sep 17 00:00:00 2001 From: Alex Waterman Date: Mon, 27 Jun 2016 17:46:02 -0700 Subject: gpu: nvgpu: Move buddy allocator to new file Move the buddy allocator implementation to a new file to make the code more organized. Also, as part of this, commonize some macros and functions which will be used by future allocator implementations. Bug 1781897 Change-Id: I1611534d5d872bf3b4677f7a1cc024a94b1c437e Signed-off-by: Alex Waterman Reviewed-on: http://git-master/r/1172116 Reviewed-by: Yu-Huan Hsu --- drivers/gpu/nvgpu/Makefile | 1 + drivers/gpu/nvgpu/gk20a/buddy_allocator_priv.h | 190 ++++ drivers/gpu/nvgpu/gk20a/gk20a_allocator.c | 1216 +---------------------- drivers/gpu/nvgpu/gk20a/gk20a_allocator.h | 200 +--- drivers/gpu/nvgpu/gk20a/gk20a_allocator_buddy.c | 1187 ++++++++++++++++++++++ 5 files changed, 1423 insertions(+), 1371 deletions(-) create mode 100644 drivers/gpu/nvgpu/gk20a/buddy_allocator_priv.h create mode 100644 drivers/gpu/nvgpu/gk20a/gk20a_allocator_buddy.c (limited to 'drivers') diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 0fdd2e28..a9a0f92f 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -53,6 +53,7 @@ nvgpu-y := \ gk20a/hal.o \ gk20a/hal_gk20a.o \ gk20a/gk20a_allocator.o \ + gk20a/gk20a_allocator_buddy.o \ gk20a/cde_gk20a.o \ gk20a/platform_gk20a_generic.o \ gk20a/tsg_gk20a.o \ diff --git a/drivers/gpu/nvgpu/gk20a/buddy_allocator_priv.h b/drivers/gpu/nvgpu/gk20a/buddy_allocator_priv.h new file mode 100644 index 00000000..bed0492c --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/buddy_allocator_priv.h @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef BUDDY_ALLOCATOR_PRIV_H +#define BUDDY_ALLOCATOR_PRIV_H + +#include +#include + +struct gk20a_allocator; +struct vm_gk20a; + +/* + * Each buddy is an element in a binary tree. + */ +struct gk20a_buddy { + struct gk20a_buddy *parent; /* Parent node. */ + struct gk20a_buddy *buddy; /* This node's buddy. */ + struct gk20a_buddy *left; /* Lower address sub-node. */ + struct gk20a_buddy *right; /* Higher address sub-node. */ + + struct list_head buddy_entry; /* List entry for various lists. */ + struct rb_node alloced_entry; /* RB tree of allocations. */ + + u64 start; /* Start address of this buddy. */ + u64 end; /* End address of this buddy. */ + u64 order; /* Buddy order. */ + +#define BALLOC_BUDDY_ALLOCED 0x1 +#define BALLOC_BUDDY_SPLIT 0x2 +#define BALLOC_BUDDY_IN_LIST 0x4 + int flags; /* List of associated flags. */ + + /* + * Size of the PDE this buddy is using. This allows for grouping like + * sized allocations into the same PDE. + */ +#define BALLOC_PTE_SIZE_ANY 0x0 +#define BALLOC_PTE_SIZE_SMALL 0x1 +#define BALLOC_PTE_SIZE_BIG 0x2 + int pte_size; +}; + +#define __buddy_flag_ops(flag, flag_up) \ + static inline int buddy_is_ ## flag(struct gk20a_buddy *b) \ + { \ + return b->flags & BALLOC_BUDDY_ ## flag_up; \ + } \ + static inline void buddy_set_ ## flag(struct gk20a_buddy *b) \ + { \ + b->flags |= BALLOC_BUDDY_ ## flag_up; \ + } \ + static inline void buddy_clr_ ## flag(struct gk20a_buddy *b) \ + { \ + b->flags &= ~BALLOC_BUDDY_ ## flag_up; \ + } + +/* + * int buddy_is_alloced(struct gk20a_buddy *b); + * void buddy_set_alloced(struct gk20a_buddy *b); + * void buddy_clr_alloced(struct gk20a_buddy *b); + * + * int buddy_is_split(struct gk20a_buddy *b); + * void buddy_set_split(struct gk20a_buddy *b); + * void buddy_clr_split(struct gk20a_buddy *b); + * + * int buddy_is_in_list(struct gk20a_buddy *b); + * void buddy_set_in_list(struct gk20a_buddy *b); + * void buddy_clr_in_list(struct gk20a_buddy *b); + */ +__buddy_flag_ops(alloced, ALLOCED); +__buddy_flag_ops(split, SPLIT); +__buddy_flag_ops(in_list, IN_LIST); + +/* + * Keeps info for a fixed allocation. + */ +struct gk20a_fixed_alloc { + struct list_head buddies; /* List of buddies. */ + struct rb_node alloced_entry; /* RB tree of fixed allocations. */ + + u64 start; /* Start of fixed block. */ + u64 end; /* End address. */ +}; + +/* + * GPU buddy allocator for the various GPU address spaces. Each addressable unit + * doesn't have to correspond to a byte. In some cases each unit is a more + * complex object such as a comp_tag line or the like. + * + * The max order is computed based on the size of the minimum order and the size + * of the address space. + * + * order_size is the size of an order 0 buddy. + */ +struct gk20a_buddy_allocator { + struct gk20a_allocator *owner; /* Owner of this buddy allocator. */ + struct vm_gk20a *vm; /* Parent VM - can be NULL. */ + + u64 base; /* Base address of the space. */ + u64 length; /* Length of the space. */ + u64 blk_size; /* Size of order 0 allocation. */ + u64 blk_shift; /* Shift to divide by blk_size. */ + + /* Internal stuff. */ + u64 start; /* Real start (aligned to blk_size). */ + u64 end; /* Real end, trimmed if needed. */ + u64 count; /* Count of objects in space. */ + u64 blks; /* Count of blks in the space. */ + u64 max_order; /* Specific maximum order. */ + + struct rb_root alloced_buddies; /* Outstanding allocations. */ + struct rb_root fixed_allocs; /* Outstanding fixed allocations. */ + + /* + * Impose an upper bound on the maximum order. + */ +#define GPU_BALLOC_ORDER_LIST_LEN (GPU_BALLOC_MAX_ORDER + 1) + + struct list_head buddy_list[GPU_BALLOC_ORDER_LIST_LEN]; + u64 buddy_list_len[GPU_BALLOC_ORDER_LIST_LEN]; + u64 buddy_list_split[GPU_BALLOC_ORDER_LIST_LEN]; + u64 buddy_list_alloced[GPU_BALLOC_ORDER_LIST_LEN]; + + /* + * This is for when the allocator is managing a GVA space (the + * GPU_BALLOC_GVA_SPACE bit is set in @flags). This requires + * that we group like sized allocations into PDE blocks. + */ + u64 pte_blk_order; + + int initialized; + + u64 flags; + + u64 bytes_alloced; + u64 bytes_alloced_real; + u64 bytes_freed; +}; + +static inline struct gk20a_buddy_allocator *buddy_allocator( + struct gk20a_allocator *a) +{ + return (struct gk20a_buddy_allocator *)(a)->priv; +} + +static inline struct list_head *balloc_get_order_list( + struct gk20a_buddy_allocator *a, int order) +{ + return &a->buddy_list[order]; +} + +static inline u64 balloc_order_to_len(struct gk20a_buddy_allocator *a, + int order) +{ + return (1 << order) * a->blk_size; +} + +static inline u64 balloc_base_shift(struct gk20a_buddy_allocator *a, + u64 base) +{ + return base - a->start; +} + +static inline u64 balloc_base_unshift(struct gk20a_buddy_allocator *a, + u64 base) +{ + return base + a->start; +} + +static inline struct gk20a_allocator *balloc_owner( + struct gk20a_buddy_allocator *a) +{ + return a->owner; +} + +#endif diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c index f2164768..b7e9a5e4 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c @@ -19,125 +19,13 @@ #include #include +#include "mm_gk20a.h" #include "platform_gk20a.h" #include "gk20a_allocator.h" -#include "mm_gk20a.h" - -static struct dentry *gk20a_alloc_debugfs_root; - -static struct kmem_cache *buddy_cache; /* slab cache for meta data. */ - u32 gk20a_alloc_tracing_on; -#define gk20a_alloc_trace_func() \ - do { \ - if (gk20a_alloc_tracing_on) \ - trace_printk("%s\n", __func__); \ - } while (0) - -#define gk20a_alloc_trace_func_done() \ - do { \ - if (gk20a_alloc_tracing_on) \ - trace_printk("%s_done\n", __func__); \ - } while (0) - -/* - * Buddy allocator implementation. - */ -static u64 gk20a_buddy_alloc(struct gk20a_allocator *__a, u64 len); -static void gk20a_buddy_free(struct gk20a_allocator *__a, u64 addr); -static u64 gk20a_buddy_alloc_fixed(struct gk20a_allocator *__a, - u64 base, u64 len); -static u64 gk20a_buddy_alloc_base(struct gk20a_allocator *a); -static u64 gk20a_buddy_alloc_length(struct gk20a_allocator *a); -static u64 gk20a_buddy_alloc_end(struct gk20a_allocator *a); -static int gk20a_buddy_alloc_inited(struct gk20a_allocator *a); - -static void gk20a_buddy_allocator_destroy(struct gk20a_allocator *__a); -static void gk20a_buddy_print_stats(struct gk20a_allocator *__a, - struct seq_file *s, int lock); - -/* Some other buddy allocator functions. */ -static struct gk20a_buddy *balloc_free_buddy(struct gk20a_buddy_allocator *a, - u64 addr); -static void balloc_coalesce(struct gk20a_buddy_allocator *a, - struct gk20a_buddy *b); -static void __balloc_do_free_fixed(struct gk20a_buddy_allocator *a, - struct gk20a_fixed_alloc *falloc); - -/* Debugging. */ -static void gk20a_init_alloc_debug(struct gk20a_allocator *a); - -/* - * This function is not present in older kernel's list.h code. - */ -#ifndef list_last_entry -#define list_last_entry(ptr, type, member) \ - list_entry((ptr)->prev, type, member) -#endif - -static const struct gk20a_allocator_ops buddy_ops = { - .alloc = gk20a_buddy_alloc, - .free = gk20a_buddy_free, - - .alloc_fixed = gk20a_buddy_alloc_fixed, - /* .free_fixed not needed. */ - - .base = gk20a_buddy_alloc_base, - .length = gk20a_buddy_alloc_length, - .end = gk20a_buddy_alloc_end, - .inited = gk20a_buddy_alloc_inited, - - .fini = gk20a_buddy_allocator_destroy, - - .print_stats = gk20a_buddy_print_stats, -}; - -/* - * GPU buddy allocator for various address spaces. - * - * Current limitations: - * o A fixed allocation could potentially be made that borders PDEs with - * different PTE sizes. This would require that fixed buffer to have - * different sized PTEs for different parts of the allocation. Probably - * best to just require PDE alignment for fixed address allocs. - * - * o It is currently possible to make an allocator that has a buddy alignment - * out of sync with the PDE block size alignment. A simple example is a - * 32GB address space starting at byte 1. Every buddy is shifted off by 1 - * which means each buddy corresponf to more than one actual GPU page. The - * best way to fix this is probably just require PDE blocksize alignment - * for the start of the address space. At the moment all allocators are - * easily PDE aligned so this hasn't been a problem. - */ - -static u64 gk20a_buddy_alloc_length(struct gk20a_allocator *a) -{ - struct gk20a_buddy_allocator *ba = a->priv; - - return ba->length; -} - -static u64 gk20a_buddy_alloc_base(struct gk20a_allocator *a) -{ - struct gk20a_buddy_allocator *ba = a->priv; - - return ba->start; -} - -static int gk20a_buddy_alloc_inited(struct gk20a_allocator *a) -{ - struct gk20a_buddy_allocator *ba = a->priv; - - return ba->inited; -} -static u64 gk20a_buddy_alloc_end(struct gk20a_allocator *a) -{ - struct gk20a_buddy_allocator *ba = a->priv; - - return ba->end; -} +static struct dentry *gk20a_alloc_debugfs_root; u64 gk20a_alloc_length(struct gk20a_allocator *a) { @@ -194,190 +82,12 @@ void gk20a_alloc_destroy(struct gk20a_allocator *a) memset(a, 0, sizeof(*a)); } -/* - * Pick a suitable maximum order for this allocator. - * - * Hueristic: Just guessing that the best max order is the largest single - * block that will fit in the address space. - */ -static void balloc_compute_max_order(struct gk20a_buddy_allocator *a) -{ - u64 true_max_order = ilog2(a->blks); - - if (a->max_order == 0) { - a->max_order = true_max_order; - return; - } - - if (a->max_order > true_max_order) - a->max_order = true_max_order; - if (a->max_order > GPU_BALLOC_MAX_ORDER) - a->max_order = GPU_BALLOC_MAX_ORDER; -} - -/* - * Since we can only allocate in chucks of a->blk_size we need to trim off - * any excess data that is not aligned to a->blk_size. - */ -static void balloc_allocator_align(struct gk20a_buddy_allocator *a) -{ - a->start = ALIGN(a->base, a->blk_size); - WARN_ON(a->start != a->base); - a->end = (a->base + a->length) & ~(a->blk_size - 1); - a->count = a->end - a->start; - a->blks = a->count >> a->blk_shift; -} - -/* - * Pass NULL for parent if you want a top level buddy. - */ -static struct gk20a_buddy *balloc_new_buddy(struct gk20a_buddy_allocator *a, - struct gk20a_buddy *parent, - u64 start, u64 order) -{ - struct gk20a_buddy *new_buddy; - - new_buddy = kmem_cache_alloc(buddy_cache, GFP_KERNEL); - if (!new_buddy) - return NULL; - - memset(new_buddy, 0, sizeof(struct gk20a_buddy)); - - new_buddy->parent = parent; - new_buddy->start = start; - new_buddy->order = order; - new_buddy->end = start + (1 << order) * a->blk_size; - - return new_buddy; -} - -static void __balloc_buddy_list_add(struct gk20a_buddy_allocator *a, - struct gk20a_buddy *b, - struct list_head *list) -{ - if (buddy_is_in_list(b)) { - alloc_dbg(balloc_owner(a), - "Oops: adding added buddy (%llu:0x%llx)\n", - b->order, b->start); - BUG(); - } - - /* - * Add big PTE blocks to the tail, small to the head for GVA spaces. - * This lets the code that checks if there are available blocks check - * without cycling through the entire list. - */ - if (a->flags & GPU_BALLOC_GVA_SPACE && - b->pte_size == BALLOC_PTE_SIZE_BIG) - list_add_tail(&b->buddy_entry, list); - else - list_add(&b->buddy_entry, list); - - buddy_set_in_list(b); -} - -static void __balloc_buddy_list_rem(struct gk20a_buddy_allocator *a, - struct gk20a_buddy *b) -{ - if (!buddy_is_in_list(b)) { - alloc_dbg(balloc_owner(a), - "Oops: removing removed buddy (%llu:0x%llx)\n", - b->order, b->start); - BUG(); - } - - list_del_init(&b->buddy_entry); - buddy_clr_in_list(b); -} - -/* - * Add a buddy to one of the buddy lists and deal with the necessary - * book keeping. Adds the buddy to the list specified by the buddy's order. - */ -static void balloc_blist_add(struct gk20a_buddy_allocator *a, - struct gk20a_buddy *b) -{ - __balloc_buddy_list_add(a, b, balloc_get_order_list(a, b->order)); - a->buddy_list_len[b->order]++; -} - -static void balloc_blist_rem(struct gk20a_buddy_allocator *a, - struct gk20a_buddy *b) -{ - __balloc_buddy_list_rem(a, b); - a->buddy_list_len[b->order]--; -} - -static u64 balloc_get_order(struct gk20a_buddy_allocator *a, u64 len) -{ - if (len == 0) - return 0; - - len--; - len >>= a->blk_shift; - - return fls(len); -} - -static u64 __balloc_max_order_in(struct gk20a_buddy_allocator *a, - u64 start, u64 end) -{ - u64 size = (end - start) >> a->blk_shift; - - if (size > 0) - return min_t(u64, ilog2(size), a->max_order); - else - return GPU_BALLOC_MAX_ORDER; -} - -/* - * Initialize the buddy lists. - */ -static int balloc_init_lists(struct gk20a_buddy_allocator *a) -{ - int i; - u64 bstart, bend, order; - struct gk20a_buddy *buddy; - - bstart = a->start; - bend = a->end; - - /* First make sure the LLs are valid. */ - for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) - INIT_LIST_HEAD(balloc_get_order_list(a, i)); - - while (bstart < bend) { - order = __balloc_max_order_in(a, bstart, bend); - - buddy = balloc_new_buddy(a, NULL, bstart, order); - if (!buddy) - goto cleanup; - - balloc_blist_add(a, buddy); - bstart += balloc_order_to_len(a, order); - } - - return 0; - -cleanup: - for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) { - if (!list_empty(balloc_get_order_list(a, i))) { - buddy = list_first_entry(balloc_get_order_list(a, i), - struct gk20a_buddy, buddy_entry); - balloc_blist_rem(a, buddy); - kmem_cache_free(buddy_cache, buddy); - } - } - - return -ENOMEM; -} - /* * Handle the common init stuff for a gk20a_allocator. */ -static int __gk20a_alloc_common_init(struct gk20a_allocator *a, - const char *name, void *priv, - const struct gk20a_allocator_ops *ops) +int __gk20a_alloc_common_init(struct gk20a_allocator *a, + const char *name, void *priv, + const struct gk20a_allocator_ops *ops) { if (!ops) return -EINVAL; @@ -392,909 +102,6 @@ static int __gk20a_alloc_common_init(struct gk20a_allocator *a, return 0; } -/* - * Initialize a buddy allocator. Returns 0 on success. This allocator does - * not necessarily manage bytes. It manages distinct ranges of resources. This - * allows the allocator to work for things like comp_tags, semaphores, etc. - * - * @allocator: Ptr to an allocator struct to init. - * @vm: GPU VM to associate this allocator with. Can be NULL. Will be used to - * get PTE size for GVA spaces. - * @name: Name of the allocator. Doesn't have to be static storage. - * @base: The base address of the resource pool being managed. - * @size: Number of resources in the pool. - * @blk_size: Minimum number of resources to allocate at once. For things like - * semaphores this is 1. For GVA this might be as much as 64k. This - * corresponds to order 0. Must be power of 2. - * @max_order: Pick a maximum order. If you leave this as 0, the buddy allocator - * will try and pick a reasonable max order. - * @flags: Extra flags necessary. See GPU_BALLOC_*. - */ -int __gk20a_buddy_allocator_init(struct gk20a_allocator *__a, - struct vm_gk20a *vm, const char *name, - u64 base, u64 size, u64 blk_size, - u64 max_order, u64 flags) -{ - int err; - struct gk20a_buddy_allocator *a; - - /* blk_size must be greater than 0 and a power of 2. */ - if (blk_size == 0) - return -EINVAL; - if (blk_size & (blk_size - 1)) - return -EINVAL; - - if (max_order > GPU_BALLOC_MAX_ORDER) - return -EINVAL; - - /* If this is to manage a GVA space we need a VM. */ - if (flags & GPU_BALLOC_GVA_SPACE && !vm) - return -EINVAL; - - a = kzalloc(sizeof(struct gk20a_buddy_allocator), GFP_KERNEL); - if (!a) - return -ENOMEM; - - err = __gk20a_alloc_common_init(__a, name, a, &buddy_ops); - if (err) - goto fail; - - a->base = base; - a->length = size; - a->blk_size = blk_size; - a->blk_shift = __ffs(blk_size); - a->owner = __a; - - /* - * If base is 0 then modfy base to be the size of one block so that we - * can return errors by returning addr == 0. - */ - if (a->base == 0) { - a->base = a->blk_size; - a->length -= a->blk_size; - } - - a->vm = vm; - if (flags & GPU_BALLOC_GVA_SPACE) - a->pte_blk_order = balloc_get_order(a, vm->big_page_size << 10); - - a->flags = flags; - a->max_order = max_order; - - balloc_allocator_align(a); - balloc_compute_max_order(a); - - /* Shared buddy kmem_cache for all allocators. */ - if (!buddy_cache) - buddy_cache = KMEM_CACHE(gk20a_buddy, 0); - if (!buddy_cache) { - err = -ENOMEM; - goto fail; - } - - a->alloced_buddies = RB_ROOT; - a->fixed_allocs = RB_ROOT; - err = balloc_init_lists(a); - if (err) - goto fail; - - a->inited = 1; - - gk20a_init_alloc_debug(__a); - alloc_dbg(__a, "New allocator: base 0x%llx\n", a->base); - alloc_dbg(__a, " size 0x%llx\n", a->length); - alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size); - alloc_dbg(__a, " max_order %llu\n", a->max_order); - alloc_dbg(__a, " flags 0x%llx\n", a->flags); - - return 0; - -fail: - kfree(a); - return err; -} - -int gk20a_buddy_allocator_init(struct gk20a_allocator *a, const char *name, - u64 base, u64 size, u64 blk_size, u64 flags) -{ - return __gk20a_buddy_allocator_init(a, NULL, name, - base, size, blk_size, 0, 0); -} - -/* - * Clean up and destroy the passed allocator. - */ -static void gk20a_buddy_allocator_destroy(struct gk20a_allocator *__a) -{ - int i; - struct rb_node *node; - struct gk20a_buddy *bud; - struct gk20a_fixed_alloc *falloc; - struct gk20a_buddy_allocator *a = __a->priv; - - alloc_lock(__a); - - if (!IS_ERR_OR_NULL(__a->debugfs_entry)) - debugfs_remove(__a->debugfs_entry); - - /* - * Free the fixed allocs first. - */ - while ((node = rb_first(&a->fixed_allocs)) != NULL) { - falloc = container_of(node, - struct gk20a_fixed_alloc, alloced_entry); - - rb_erase(node, &a->fixed_allocs); - __balloc_do_free_fixed(a, falloc); - } - - /* - * And now free all outstanding allocations. - */ - while ((node = rb_first(&a->alloced_buddies)) != NULL) { - bud = container_of(node, struct gk20a_buddy, alloced_entry); - balloc_free_buddy(a, bud->start); - balloc_blist_add(a, bud); - balloc_coalesce(a, bud); - } - - /* - * Now clean up the unallocated buddies. - */ - for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) { - BUG_ON(a->buddy_list_alloced[i] != 0); - - while (!list_empty(balloc_get_order_list(a, i))) { - bud = list_first_entry(balloc_get_order_list(a, i), - struct gk20a_buddy, buddy_entry); - balloc_blist_rem(a, bud); - kmem_cache_free(buddy_cache, bud); - } - - if (a->buddy_list_len[i] != 0) { - pr_info("Excess buddies!!! (%d: %llu)\n", - i, a->buddy_list_len[i]); - BUG(); - } - if (a->buddy_list_split[i] != 0) { - pr_info("Excess split nodes!!! (%d: %llu)\n", - i, a->buddy_list_split[i]); - BUG(); - } - if (a->buddy_list_alloced[i] != 0) { - pr_info("Excess alloced nodes!!! (%d: %llu)\n", - i, a->buddy_list_alloced[i]); - BUG(); - } - } - - kfree(a); - - alloc_unlock(__a); -} - -/* - * Combine the passed buddy if possible. The pointer in @b may not be valid - * after this as the buddy may be freed. - * - * @a must be locked. - */ -static void balloc_coalesce(struct gk20a_buddy_allocator *a, - struct gk20a_buddy *b) -{ - struct gk20a_buddy *parent; - - if (buddy_is_alloced(b) || buddy_is_split(b)) - return; - - /* - * If both our buddy and I are both not allocated and not split then - * we can coalesce ourselves. - */ - if (!b->buddy) - return; - if (buddy_is_alloced(b->buddy) || buddy_is_split(b->buddy)) - return; - - parent = b->parent; - - balloc_blist_rem(a, b); - balloc_blist_rem(a, b->buddy); - - buddy_clr_split(parent); - a->buddy_list_split[parent->order]--; - balloc_blist_add(a, parent); - - /* - * Recursively coalesce as far as we can go. - */ - balloc_coalesce(a, parent); - - /* Clean up the remains. */ - kmem_cache_free(buddy_cache, b->buddy); - kmem_cache_free(buddy_cache, b); -} - -/* - * Split a buddy into two new buddies who are 1/2 the size of the parent buddy. - * - * @a must be locked. - */ -static int balloc_split_buddy(struct gk20a_buddy_allocator *a, - struct gk20a_buddy *b, int pte_size) -{ - struct gk20a_buddy *left, *right; - u64 half; - - left = balloc_new_buddy(a, b, b->start, b->order - 1); - if (!left) - return -ENOMEM; - - half = (b->end - b->start) / 2; - - right = balloc_new_buddy(a, b, b->start + half, b->order - 1); - if (!right) { - kmem_cache_free(buddy_cache, left); - return -ENOMEM; - } - - buddy_set_split(b); - a->buddy_list_split[b->order]++; - - b->left = left; - b->right = right; - left->buddy = right; - right->buddy = left; - left->parent = b; - right->parent = b; - - /* PTE considerations. */ - if (a->flags & GPU_BALLOC_GVA_SPACE && - left->order <= a->pte_blk_order) { - left->pte_size = pte_size; - right->pte_size = pte_size; - } - - balloc_blist_rem(a, b); - balloc_blist_add(a, left); - balloc_blist_add(a, right); - - return 0; -} - -/* - * Place the passed buddy into the RB tree for allocated buddies. Never fails - * unless the passed entry is a duplicate which is a bug. - * - * @a must be locked. - */ -static void balloc_alloc_buddy(struct gk20a_buddy_allocator *a, - struct gk20a_buddy *b) -{ - struct rb_node **new = &(a->alloced_buddies.rb_node); - struct rb_node *parent = NULL; - - while (*new) { - struct gk20a_buddy *bud = container_of(*new, struct gk20a_buddy, - alloced_entry); - - parent = *new; - if (b->start < bud->start) - new = &((*new)->rb_left); - else if (b->start > bud->start) - new = &((*new)->rb_right); - else - BUG_ON("Duplicate entries in allocated list!\n"); - } - - rb_link_node(&b->alloced_entry, parent, new); - rb_insert_color(&b->alloced_entry, &a->alloced_buddies); - - buddy_set_alloced(b); - a->buddy_list_alloced[b->order]++; -} - -/* - * Remove the passed buddy from the allocated buddy RB tree. Returns the - * deallocated buddy for further processing. - * - * @a must be locked. - */ -static struct gk20a_buddy *balloc_free_buddy(struct gk20a_buddy_allocator *a, - u64 addr) -{ - struct rb_node *node = a->alloced_buddies.rb_node; - struct gk20a_buddy *bud; - - while (node) { - bud = container_of(node, struct gk20a_buddy, alloced_entry); - - if (addr < bud->start) - node = node->rb_left; - else if (addr > bud->start) - node = node->rb_right; - else - break; - } - - if (!node) - return NULL; - - rb_erase(node, &a->alloced_buddies); - buddy_clr_alloced(bud); - a->buddy_list_alloced[bud->order]--; - - return bud; -} - -/* - * Find a suitable buddy for the given order and PTE type (big or little). - */ -static struct gk20a_buddy *__balloc_find_buddy(struct gk20a_buddy_allocator *a, - u64 order, int pte_size) -{ - struct gk20a_buddy *bud; - - if (order > a->max_order || - list_empty(balloc_get_order_list(a, order))) - return NULL; - - if (a->flags & GPU_BALLOC_GVA_SPACE && - pte_size == BALLOC_PTE_SIZE_BIG) - bud = list_last_entry(balloc_get_order_list(a, order), - struct gk20a_buddy, buddy_entry); - else - bud = list_first_entry(balloc_get_order_list(a, order), - struct gk20a_buddy, buddy_entry); - - if (bud->pte_size != BALLOC_PTE_SIZE_ANY && - bud->pte_size != pte_size) - return NULL; - - return bud; -} - -/* - * Allocate a suitably sized buddy. If no suitable buddy exists split higher - * order buddies until we have a suitable buddy to allocate. - * - * For PDE grouping add an extra check to see if a buddy is suitable: that the - * buddy exists in a PDE who's PTE size is reasonable - * - * @a must be locked. - */ -static u64 __balloc_do_alloc(struct gk20a_buddy_allocator *a, - u64 order, int pte_size) -{ - u64 split_order; - struct gk20a_buddy *bud = NULL; - - split_order = order; - while (split_order <= a->max_order && - !(bud = __balloc_find_buddy(a, split_order, pte_size))) - split_order++; - - /* Out of memory! */ - if (!bud) - return 0; - - while (bud->order != order) { - if (balloc_split_buddy(a, bud, pte_size)) - return 0; /* No mem... */ - bud = bud->left; - } - - balloc_blist_rem(a, bud); - balloc_alloc_buddy(a, bud); - - return bud->start; -} - -/* - * Allocate memory from the passed allocator. - */ -static u64 gk20a_buddy_alloc(struct gk20a_allocator *__a, u64 len) -{ - u64 order, addr; - int pte_size; - struct gk20a_buddy_allocator *a = __a->priv; - - gk20a_alloc_trace_func(); - - alloc_lock(__a); - - order = balloc_get_order(a, len); - - if (order > a->max_order) { - alloc_unlock(__a); - alloc_dbg(balloc_owner(a), "Alloc fail\n"); - gk20a_alloc_trace_func_done(); - return 0; - } - - /* - * For now pass the base address of the allocator's region to - * __get_pte_size(). This ensures we get the right page size for - * the alloc but we don't have to know what the real address is - * going to be quite yet. - * - * TODO: once userspace supports a unified address space pass 0 for - * the base. This will make only 'len' affect the PTE size. - */ - if (a->flags & GPU_BALLOC_GVA_SPACE) - pte_size = __get_pte_size(a->vm, a->base, len); - else - pte_size = BALLOC_PTE_SIZE_ANY; - - addr = __balloc_do_alloc(a, order, pte_size); - - if (addr) { - a->bytes_alloced += len; - a->bytes_alloced_real += balloc_order_to_len(a, order); - alloc_dbg(balloc_owner(a), - "Alloc 0x%-10llx %3lld:0x%-10llx pte_size=%s\n", - addr, order, len, - pte_size == gmmu_page_size_big ? "big" : - pte_size == gmmu_page_size_small ? "small" : - "NA/any"); - } else { - alloc_dbg(balloc_owner(a), "Alloc failed: no mem!\n"); - } - - alloc_unlock(__a); - - gk20a_alloc_trace_func_done(); - return addr; -} - -/* - * See if the passed range is actually available for allocation. If so, then - * return 1, otherwise return 0. - * - * TODO: Right now this uses the unoptimal approach of going through all - * outstanding allocations and checking their base/ends. This could be better. - */ -static int balloc_is_range_free(struct gk20a_buddy_allocator *a, - u64 base, u64 end) -{ - struct rb_node *node; - struct gk20a_buddy *bud; - - node = rb_first(&a->alloced_buddies); - if (!node) - return 1; /* No allocs yet. */ - - bud = container_of(node, struct gk20a_buddy, alloced_entry); - - while (bud->start < end) { - if ((bud->start > base && bud->start < end) || - (bud->end > base && bud->end < end)) - return 0; - - node = rb_next(node); - if (!node) - break; - bud = container_of(node, struct gk20a_buddy, alloced_entry); - } - - return 1; -} - -static void balloc_alloc_fixed(struct gk20a_buddy_allocator *a, - struct gk20a_fixed_alloc *f) -{ - struct rb_node **new = &(a->fixed_allocs.rb_node); - struct rb_node *parent = NULL; - - while (*new) { - struct gk20a_fixed_alloc *falloc = - container_of(*new, struct gk20a_fixed_alloc, - alloced_entry); - - parent = *new; - if (f->start < falloc->start) - new = &((*new)->rb_left); - else if (f->start > falloc->start) - new = &((*new)->rb_right); - else - BUG_ON("Duplicate entries in allocated list!\n"); - } - - rb_link_node(&f->alloced_entry, parent, new); - rb_insert_color(&f->alloced_entry, &a->fixed_allocs); -} - -/* - * Remove the passed buddy from the allocated buddy RB tree. Returns the - * deallocated buddy for further processing. - * - * @a must be locked. - */ -static struct gk20a_fixed_alloc *balloc_free_fixed( - struct gk20a_buddy_allocator *a, u64 addr) -{ - struct rb_node *node = a->fixed_allocs.rb_node; - struct gk20a_fixed_alloc *falloc; - - while (node) { - falloc = container_of(node, - struct gk20a_fixed_alloc, alloced_entry); - - if (addr < falloc->start) - node = node->rb_left; - else if (addr > falloc->start) - node = node->rb_right; - else - break; - } - - if (!node) - return NULL; - - rb_erase(node, &a->fixed_allocs); - - return falloc; -} - -/* - * Find the parent range - doesn't necessarily need the parent to actually exist - * as a buddy. Finding an existing parent comes later... - */ -static void __balloc_get_parent_range(struct gk20a_buddy_allocator *a, - u64 base, u64 order, - u64 *pbase, u64 *porder) -{ - u64 base_mask; - u64 shifted_base = balloc_base_shift(a, base); - - order++; - base_mask = ~((a->blk_size << order) - 1); - - shifted_base &= base_mask; - - *pbase = balloc_base_unshift(a, shifted_base); - *porder = order; -} - -/* - * Makes a buddy at the passed address. This will make all parent buddies - * necessary for this buddy to exist as well. - */ -static struct gk20a_buddy *__balloc_make_fixed_buddy( - struct gk20a_buddy_allocator *a, u64 base, u64 order) -{ - struct gk20a_buddy *bud = NULL; - struct list_head *order_list; - u64 cur_order = order, cur_base = base; - - /* - * Algo: - * 1. Keep jumping up a buddy order until we find the real buddy that - * this buddy exists in. - * 2. Then work our way down through the buddy tree until we hit a dead - * end. - * 3. Start splitting buddies until we split to the one we need to - * make. - */ - while (cur_order <= a->max_order) { - int found = 0; - - order_list = balloc_get_order_list(a, cur_order); - list_for_each_entry(bud, order_list, buddy_entry) { - if (bud->start == cur_base) { - found = 1; - break; - } - } - - if (found) - break; - - __balloc_get_parent_range(a, cur_base, cur_order, - &cur_base, &cur_order); - } - - if (cur_order > a->max_order) { - alloc_dbg(balloc_owner(a), "No buddy for range ???\n"); - return NULL; - } - - /* Split this buddy as necessary until we get the target buddy. */ - while (bud->start != base || bud->order != order) { - if (balloc_split_buddy(a, bud, BALLOC_PTE_SIZE_ANY)) { - balloc_coalesce(a, bud); - return NULL; - } - - if (base < bud->right->start) - bud = bud->left; - else - bud = bud->right; - - } - - return bud; -} - -static u64 __balloc_do_alloc_fixed(struct gk20a_buddy_allocator *a, - struct gk20a_fixed_alloc *falloc, - u64 base, u64 len) -{ - u64 shifted_base, inc_base; - u64 align_order; - - shifted_base = balloc_base_shift(a, base); - if (shifted_base == 0) - align_order = __fls(len >> a->blk_shift); - else - align_order = min_t(u64, - __ffs(shifted_base >> a->blk_shift), - __fls(len >> a->blk_shift)); - - if (align_order > a->max_order) { - alloc_dbg(balloc_owner(a), - "Align order too big: %llu > %llu\n", - align_order, a->max_order); - return 0; - } - - /* - * Generate a list of buddies that satisfy this allocation. - */ - inc_base = shifted_base; - while (inc_base < (shifted_base + len)) { - u64 order_len = balloc_order_to_len(a, align_order); - u64 remaining; - struct gk20a_buddy *bud; - - bud = __balloc_make_fixed_buddy(a, - balloc_base_unshift(a, inc_base), - align_order); - if (!bud) { - alloc_dbg(balloc_owner(a), - "Fixed buddy failed: {0x%llx, %llu}!\n", - balloc_base_unshift(a, inc_base), - align_order); - goto err_and_cleanup; - } - - balloc_blist_rem(a, bud); - balloc_alloc_buddy(a, bud); - __balloc_buddy_list_add(a, bud, &falloc->buddies); - - /* Book keeping. */ - inc_base += order_len; - remaining = (shifted_base + len) - inc_base; - align_order = __ffs(inc_base >> a->blk_shift); - - /* If we don't have much left - trim down align_order. */ - if (balloc_order_to_len(a, align_order) > remaining) - align_order = __balloc_max_order_in(a, inc_base, - inc_base + remaining); - } - - return base; - -err_and_cleanup: - while (!list_empty(&falloc->buddies)) { - struct gk20a_buddy *bud = list_first_entry(&falloc->buddies, - struct gk20a_buddy, - buddy_entry); - - __balloc_buddy_list_rem(a, bud); - balloc_free_buddy(a, bud->start); - kmem_cache_free(buddy_cache, bud); - } - - return 0; -} - -/* - * Allocate a fixed address allocation. The address of the allocation is @base - * and the length is @len. This is not a typical buddy allocator operation and - * as such has a high posibility of failure if the address space is heavily in - * use. - * - * Please do not use this function unless _absolutely_ necessary. - */ -static u64 gk20a_buddy_alloc_fixed(struct gk20a_allocator *__a, - u64 base, u64 len) -{ - u64 ret, real_bytes = 0; - struct gk20a_buddy *bud; - struct gk20a_fixed_alloc *falloc = NULL; - struct gk20a_buddy_allocator *a = __a->priv; - - gk20a_alloc_trace_func(); - - /* If base isn't aligned to an order 0 block, fail. */ - if (base & (a->blk_size - 1)) - goto fail; - - if (len == 0) - goto fail; - - falloc = kmalloc(sizeof(*falloc), GFP_KERNEL); - if (!falloc) - goto fail; - - INIT_LIST_HEAD(&falloc->buddies); - falloc->start = base; - falloc->end = base + len; - - alloc_lock(__a); - if (!balloc_is_range_free(a, base, base + len)) { - alloc_dbg(balloc_owner(a), - "Range not free: 0x%llx -> 0x%llx\n", - base, base + len); - goto fail_unlock; - } - - ret = __balloc_do_alloc_fixed(a, falloc, base, len); - if (!ret) { - alloc_dbg(balloc_owner(a), - "Alloc-fixed failed ?? 0x%llx -> 0x%llx\n", - base, base + len); - goto fail_unlock; - } - - balloc_alloc_fixed(a, falloc); - - list_for_each_entry(bud, &falloc->buddies, buddy_entry) - real_bytes += (bud->end - bud->start); - - a->bytes_alloced += len; - a->bytes_alloced_real += real_bytes; - - alloc_unlock(__a); - alloc_dbg(balloc_owner(a), "Alloc (fixed) 0x%llx\n", base); - - gk20a_alloc_trace_func_done(); - return base; - -fail_unlock: - alloc_unlock(__a); -fail: - kfree(falloc); - gk20a_alloc_trace_func_done(); - return 0; -} - -static void __balloc_do_free_fixed(struct gk20a_buddy_allocator *a, - struct gk20a_fixed_alloc *falloc) -{ - struct gk20a_buddy *bud; - - while (!list_empty(&falloc->buddies)) { - bud = list_first_entry(&falloc->buddies, - struct gk20a_buddy, - buddy_entry); - __balloc_buddy_list_rem(a, bud); - - balloc_free_buddy(a, bud->start); - balloc_blist_add(a, bud); - a->bytes_freed += balloc_order_to_len(a, bud->order); - - /* - * Attemp to defrag the allocation. - */ - balloc_coalesce(a, bud); - } - - kfree(falloc); -} - -/* - * Free the passed allocation. - */ -static void gk20a_buddy_free(struct gk20a_allocator *__a, u64 addr) -{ - struct gk20a_buddy *bud; - struct gk20a_fixed_alloc *falloc; - struct gk20a_buddy_allocator *a = __a->priv; - - gk20a_alloc_trace_func(); - - if (!addr) { - gk20a_alloc_trace_func_done(); - return; - } - - alloc_lock(__a); - - /* - * First see if this is a fixed alloc. If not fall back to a regular - * buddy. - */ - falloc = balloc_free_fixed(a, addr); - if (falloc) { - __balloc_do_free_fixed(a, falloc); - goto done; - } - - bud = balloc_free_buddy(a, addr); - if (!bud) - goto done; - - balloc_blist_add(a, bud); - a->bytes_freed += balloc_order_to_len(a, bud->order); - - /* - * Attemp to defrag the allocation. - */ - balloc_coalesce(a, bud); - -done: - alloc_unlock(__a); - alloc_dbg(balloc_owner(a), "Free 0x%llx\n", addr); - gk20a_alloc_trace_func_done(); - return; -} - -/* - * Print the buddy allocator top level stats. If you pass @s as NULL then the - * stats are printed to the kernel log. This lets this code be used for - * debugging purposes internal to the allocator. - */ -static void gk20a_buddy_print_stats(struct gk20a_allocator *__a, - struct seq_file *s, int lock) -{ - int i; - struct rb_node *node; - struct gk20a_fixed_alloc *falloc; - struct gk20a_buddy_allocator *a = __a->priv; - - __alloc_pstat(s, __a, "base = %llu, limit = %llu, blk_size = %llu\n", - a->base, a->length, a->blk_size); - __alloc_pstat(s, __a, "Internal params:\n"); - __alloc_pstat(s, __a, " start = 0x%llx\n", a->start); - __alloc_pstat(s, __a, " end = 0x%llx\n", a->end); - __alloc_pstat(s, __a, " count = 0x%llx\n", a->count); - __alloc_pstat(s, __a, " blks = 0x%llx\n", a->blks); - __alloc_pstat(s, __a, " max_order = %llu\n", a->max_order); - - __alloc_pstat(s, __a, "Buddy blocks:\n"); - __alloc_pstat(s, __a, " Order Free Alloced Split\n"); - __alloc_pstat(s, __a, " ----- ---- ------- -----\n"); - - if (lock) - alloc_lock(__a); - for (i = a->max_order; i >= 0; i--) { - if (a->buddy_list_len[i] == 0 && - a->buddy_list_alloced[i] == 0 && - a->buddy_list_split[i] == 0) - continue; - - __alloc_pstat(s, __a, " %3d %-7llu %-9llu %llu\n", i, - a->buddy_list_len[i], - a->buddy_list_alloced[i], - a->buddy_list_split[i]); - } - - __alloc_pstat(s, __a, "\n"); - - for (node = rb_first(&a->fixed_allocs), i = 1; - node != NULL; - node = rb_next(node)) { - falloc = container_of(node, - struct gk20a_fixed_alloc, alloced_entry); - - __alloc_pstat(s, __a, "Fixed alloc (%d): [0x%llx -> 0x%llx]\n", - i, falloc->start, falloc->end); - } - - __alloc_pstat(s, __a, "\n"); - __alloc_pstat(s, __a, "Bytes allocated: %llu\n", - a->bytes_alloced); - __alloc_pstat(s, __a, "Bytes allocated (real): %llu\n", - a->bytes_alloced_real); - __alloc_pstat(s, __a, "Bytes freed: %llu\n", - a->bytes_freed); - - if (lock) - alloc_unlock(__a); -} - void gk20a_alloc_print_stats(struct gk20a_allocator *__a, struct seq_file *s, int lock) { @@ -1322,7 +129,7 @@ static const struct file_operations __alloc_fops = { .release = single_release, }; -static void gk20a_init_alloc_debug(struct gk20a_allocator *a) +void gk20a_init_alloc_debug(struct gk20a_allocator *a) { if (!gk20a_alloc_debugfs_root) return; @@ -1332,7 +139,15 @@ static void gk20a_init_alloc_debug(struct gk20a_allocator *a) a, &__alloc_fops); } -#ifdef CONFIG_DEBUG_FS +void gk20a_fini_alloc_debug(struct gk20a_allocator *a) +{ + if (!gk20a_alloc_debugfs_root) + return; + + if (!IS_ERR_OR_NULL(a->debugfs_entry)) + debugfs_remove(a->debugfs_entry); +} + void gk20a_alloc_debugfs_init(struct platform_device *pdev) { struct gk20a_platform *platform = platform_get_drvdata(pdev); @@ -1345,4 +160,3 @@ void gk20a_alloc_debugfs_init(struct platform_device *pdev) debugfs_create_u32("tracing", 0664, gk20a_alloc_debugfs_root, &gk20a_alloc_tracing_on); } -#endif diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h index 74e23e6c..06819700 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h @@ -17,8 +17,6 @@ #ifndef GK20A_ALLOCATOR_H #define GK20A_ALLOCATOR_H -#include -#include #include #include #include @@ -63,138 +61,6 @@ struct gk20a_allocator_ops { struct seq_file *s, int lock); }; -/* - * Each buddy is an element in a binary tree. - */ -struct gk20a_buddy { - struct gk20a_buddy *parent; /* Parent node. */ - struct gk20a_buddy *buddy; /* This node's buddy. */ - struct gk20a_buddy *left; /* Lower address sub-node. */ - struct gk20a_buddy *right; /* Higher address sub-node. */ - - struct list_head buddy_entry; /* List entry for various lists. */ - struct rb_node alloced_entry; /* RB tree of allocations. */ - - u64 start; /* Start address of this buddy. */ - u64 end; /* End address of this buddy. */ - u64 order; /* Buddy order. */ - -#define BALLOC_BUDDY_ALLOCED 0x1 -#define BALLOC_BUDDY_SPLIT 0x2 -#define BALLOC_BUDDY_IN_LIST 0x4 - int flags; /* List of associated flags. */ - - /* - * Size of the PDE this buddy is using. This allows for grouping like - * sized allocations into the same PDE. - */ -#define BALLOC_PTE_SIZE_ANY 0x0 -#define BALLOC_PTE_SIZE_SMALL 0x1 -#define BALLOC_PTE_SIZE_BIG 0x2 - int pte_size; -}; - -#define __buddy_flag_ops(flag, flag_up) \ - static inline int buddy_is_ ## flag(struct gk20a_buddy *b) \ - { \ - return b->flags & BALLOC_BUDDY_ ## flag_up; \ - } \ - static inline void buddy_set_ ## flag(struct gk20a_buddy *b) \ - { \ - b->flags |= BALLOC_BUDDY_ ## flag_up; \ - } \ - static inline void buddy_clr_ ## flag(struct gk20a_buddy *b) \ - { \ - b->flags &= ~BALLOC_BUDDY_ ## flag_up; \ - } - -/* - * int buddy_is_alloced(struct gk20a_buddy *b); - * void buddy_set_alloced(struct gk20a_buddy *b); - * void buddy_clr_alloced(struct gk20a_buddy *b); - * - * int buddy_is_split(struct gk20a_buddy *b); - * void buddy_set_split(struct gk20a_buddy *b); - * void buddy_clr_split(struct gk20a_buddy *b); - * - * int buddy_is_in_list(struct gk20a_buddy *b); - * void buddy_set_in_list(struct gk20a_buddy *b); - * void buddy_clr_in_list(struct gk20a_buddy *b); - */ -__buddy_flag_ops(alloced, ALLOCED); -__buddy_flag_ops(split, SPLIT); -__buddy_flag_ops(in_list, IN_LIST); - -/* - * Keeps info for a fixed allocation. - */ -struct gk20a_fixed_alloc { - struct list_head buddies; /* List of buddies. */ - struct rb_node alloced_entry; /* RB tree of fixed allocations. */ - - u64 start; /* Start of fixed block. */ - u64 end; /* End address. */ -}; - -/* - * GPU buddy allocator for the various GPU address spaces. Each addressable unit - * doesn't have to correspond to a byte. In some cases each unit is a more - * complex object such as a comp_tag line or the like. - * - * The max order is computed based on the size of the minimum order and the size - * of the address space. - * - * order_size is the size of an order 0 buddy. - */ -struct gk20a_buddy_allocator { - struct gk20a_allocator *owner; /* Owner of this buddy allocator. */ - struct vm_gk20a *vm; /* Parent VM - can be NULL. */ - - u64 base; /* Base address of the space. */ - u64 length; /* Length of the space. */ - u64 blk_size; /* Size of order 0 allocation. */ - u64 blk_shift; /* Shift to divide by blk_size. */ - - int init; /* Non-zero if initialized. */ - - /* Internal stuff. */ - u64 start; /* Real start (aligned to blk_size). */ - u64 end; /* Real end, trimmed if needed. */ - u64 count; /* Count of objects in space. */ - u64 blks; /* Count of blks in the space. */ - u64 max_order; /* Specific maximum order. */ - - struct rb_root alloced_buddies; /* Outstanding allocations. */ - struct rb_root fixed_allocs; /* Outstanding fixed allocations. */ - - /* - * Impose an upper bound on the maximum order. - */ -#define GPU_BALLOC_MAX_ORDER 31 -#define GPU_BALLOC_ORDER_LIST_LEN (GPU_BALLOC_MAX_ORDER + 1) - - struct list_head buddy_list[GPU_BALLOC_ORDER_LIST_LEN]; - u64 buddy_list_len[GPU_BALLOC_ORDER_LIST_LEN]; - u64 buddy_list_split[GPU_BALLOC_ORDER_LIST_LEN]; - u64 buddy_list_alloced[GPU_BALLOC_ORDER_LIST_LEN]; - - /* - * This is for when the allocator is managing a GVA space (the - * GPU_BALLOC_GVA_SPACE bit is set in @flags). This requires - * that we group like sized allocations into PDE blocks. - */ - u64 pte_blk_order; - - int inited; - -#define GPU_BALLOC_GVA_SPACE 0x1 - u64 flags; - - u64 bytes_alloced; - u64 bytes_alloced_real; - u64 bytes_freed; -}; - struct gk20a_allocator { char name[32]; struct mutex lock; @@ -205,6 +71,11 @@ struct gk20a_allocator { struct dentry *debugfs_entry; }; +/* + * Allocator flags. + */ +#define GPU_BALLOC_GVA_SPACE 0x1 + static inline void alloc_lock(struct gk20a_allocator *a) { mutex_lock(&a->lock); @@ -215,42 +86,6 @@ static inline void alloc_unlock(struct gk20a_allocator *a) mutex_unlock(&a->lock); } -static inline struct gk20a_buddy_allocator *buddy_allocator( - struct gk20a_allocator *a) -{ - return (struct gk20a_buddy_allocator *)a->priv; -} - -static inline struct list_head *balloc_get_order_list( - struct gk20a_buddy_allocator *a, int order) -{ - return &a->buddy_list[order]; -} - -static inline u64 balloc_order_to_len(struct gk20a_buddy_allocator *a, - int order) -{ - return (1 << order) * a->blk_size; -} - -static inline u64 balloc_base_shift(struct gk20a_buddy_allocator *a, - u64 base) -{ - return base - a->start; -} - -static inline u64 balloc_base_unshift(struct gk20a_buddy_allocator *a, - u64 base) -{ - return base + a->start; -} - -static inline struct gk20a_allocator *balloc_owner( - struct gk20a_buddy_allocator *a) -{ - return a->owner; -} - /* * Buddy allocator specific initializers. */ @@ -262,6 +97,8 @@ int gk20a_buddy_allocator_init(struct gk20a_allocator *allocator, const char *name, u64 base, u64 size, u64 blk_size, u64 flags); +#define GPU_BALLOC_MAX_ORDER 31 + /* * Allocator APIs. */ @@ -281,11 +118,34 @@ void gk20a_alloc_destroy(struct gk20a_allocator *allocator); void gk20a_alloc_print_stats(struct gk20a_allocator *a, struct seq_file *s, int lock); +/* + * Common functionality for the internals of the allocators. + */ +void gk20a_init_alloc_debug(struct gk20a_allocator *a); +void gk20a_fini_alloc_debug(struct gk20a_allocator *a); +int __gk20a_alloc_common_init(struct gk20a_allocator *a, + const char *name, void *priv, + const struct gk20a_allocator_ops *ops); + /* * Debug stuff. */ +extern u32 gk20a_alloc_tracing_on; + void gk20a_alloc_debugfs_init(struct platform_device *pdev); +#define gk20a_alloc_trace_func() \ + do { \ + if (gk20a_alloc_tracing_on) \ + trace_printk("%s\n", __func__); \ + } while (0) + +#define gk20a_alloc_trace_func_done() \ + do { \ + if (gk20a_alloc_tracing_on) \ + trace_printk("%s_done\n", __func__); \ + } while (0) + #define __alloc_pstat(seq, allocator, fmt, arg...) \ do { \ if (s) \ diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_buddy.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_buddy.c new file mode 100644 index 00000000..4a1df339 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_buddy.c @@ -0,0 +1,1187 @@ +/* + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +#include "mm_gk20a.h" +#include "platform_gk20a.h" +#include "gk20a_allocator.h" +#include "buddy_allocator_priv.h" + +static struct kmem_cache *buddy_cache; /* slab cache for meta data. */ + +/* Some other buddy allocator functions. */ +static struct gk20a_buddy *balloc_free_buddy(struct gk20a_buddy_allocator *a, + u64 addr); +static void balloc_coalesce(struct gk20a_buddy_allocator *a, + struct gk20a_buddy *b); +static void __balloc_do_free_fixed(struct gk20a_buddy_allocator *a, + struct gk20a_fixed_alloc *falloc); + +/* + * This function is not present in older kernel's list.h code. + */ +#ifndef list_last_entry +#define list_last_entry(ptr, type, member) \ + list_entry((ptr)->prev, type, member) +#endif + +/* + * GPU buddy allocator for various address spaces. + * + * Current limitations: + * o A fixed allocation could potentially be made that borders PDEs with + * different PTE sizes. This would require that fixed buffer to have + * different sized PTEs for different parts of the allocation. Probably + * best to just require PDE alignment for fixed address allocs. + * + * o It is currently possible to make an allocator that has a buddy alignment + * out of sync with the PDE block size alignment. A simple example is a + * 32GB address space starting at byte 1. Every buddy is shifted off by 1 + * which means each buddy corresponf to more than one actual GPU page. The + * best way to fix this is probably just require PDE blocksize alignment + * for the start of the address space. At the moment all allocators are + * easily PDE aligned so this hasn't been a problem. + */ + +/* + * Pick a suitable maximum order for this allocator. + * + * Hueristic: Just guessing that the best max order is the largest single + * block that will fit in the address space. + */ +static void balloc_compute_max_order(struct gk20a_buddy_allocator *a) +{ + u64 true_max_order = ilog2(a->blks); + + if (a->max_order == 0) { + a->max_order = true_max_order; + return; + } + + if (a->max_order > true_max_order) + a->max_order = true_max_order; + if (a->max_order > GPU_BALLOC_MAX_ORDER) + a->max_order = GPU_BALLOC_MAX_ORDER; +} + +/* + * Since we can only allocate in chucks of a->blk_size we need to trim off + * any excess data that is not aligned to a->blk_size. + */ +static void balloc_allocator_align(struct gk20a_buddy_allocator *a) +{ + a->start = ALIGN(a->base, a->blk_size); + WARN_ON(a->start != a->base); + a->end = (a->base + a->length) & ~(a->blk_size - 1); + a->count = a->end - a->start; + a->blks = a->count >> a->blk_shift; +} + +/* + * Pass NULL for parent if you want a top level buddy. + */ +static struct gk20a_buddy *balloc_new_buddy(struct gk20a_buddy_allocator *a, + struct gk20a_buddy *parent, + u64 start, u64 order) +{ + struct gk20a_buddy *new_buddy; + + new_buddy = kmem_cache_alloc(buddy_cache, GFP_KERNEL); + if (!new_buddy) + return NULL; + + memset(new_buddy, 0, sizeof(struct gk20a_buddy)); + + new_buddy->parent = parent; + new_buddy->start = start; + new_buddy->order = order; + new_buddy->end = start + (1 << order) * a->blk_size; + + return new_buddy; +} + +static void __balloc_buddy_list_add(struct gk20a_buddy_allocator *a, + struct gk20a_buddy *b, + struct list_head *list) +{ + if (buddy_is_in_list(b)) { + alloc_dbg(balloc_owner(a), + "Oops: adding added buddy (%llu:0x%llx)\n", + b->order, b->start); + BUG(); + } + + /* + * Add big PTE blocks to the tail, small to the head for GVA spaces. + * This lets the code that checks if there are available blocks check + * without cycling through the entire list. + */ + if (a->flags & GPU_BALLOC_GVA_SPACE && + b->pte_size == BALLOC_PTE_SIZE_BIG) + list_add_tail(&b->buddy_entry, list); + else + list_add(&b->buddy_entry, list); + + buddy_set_in_list(b); +} + +static void __balloc_buddy_list_rem(struct gk20a_buddy_allocator *a, + struct gk20a_buddy *b) +{ + if (!buddy_is_in_list(b)) { + alloc_dbg(balloc_owner(a), + "Oops: removing removed buddy (%llu:0x%llx)\n", + b->order, b->start); + BUG(); + } + + list_del_init(&b->buddy_entry); + buddy_clr_in_list(b); +} + +/* + * Add a buddy to one of the buddy lists and deal with the necessary + * book keeping. Adds the buddy to the list specified by the buddy's order. + */ +static void balloc_blist_add(struct gk20a_buddy_allocator *a, + struct gk20a_buddy *b) +{ + __balloc_buddy_list_add(a, b, balloc_get_order_list(a, b->order)); + a->buddy_list_len[b->order]++; +} + +static void balloc_blist_rem(struct gk20a_buddy_allocator *a, + struct gk20a_buddy *b) +{ + __balloc_buddy_list_rem(a, b); + a->buddy_list_len[b->order]--; +} + +static u64 balloc_get_order(struct gk20a_buddy_allocator *a, u64 len) +{ + if (len == 0) + return 0; + + len--; + len >>= a->blk_shift; + + return fls(len); +} + +static u64 __balloc_max_order_in(struct gk20a_buddy_allocator *a, + u64 start, u64 end) +{ + u64 size = (end - start) >> a->blk_shift; + + if (size > 0) + return min_t(u64, ilog2(size), a->max_order); + else + return GPU_BALLOC_MAX_ORDER; +} + +/* + * Initialize the buddy lists. + */ +static int balloc_init_lists(struct gk20a_buddy_allocator *a) +{ + int i; + u64 bstart, bend, order; + struct gk20a_buddy *buddy; + + bstart = a->start; + bend = a->end; + + /* First make sure the LLs are valid. */ + for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) + INIT_LIST_HEAD(balloc_get_order_list(a, i)); + + while (bstart < bend) { + order = __balloc_max_order_in(a, bstart, bend); + + buddy = balloc_new_buddy(a, NULL, bstart, order); + if (!buddy) + goto cleanup; + + balloc_blist_add(a, buddy); + bstart += balloc_order_to_len(a, order); + } + + return 0; + +cleanup: + for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) { + if (!list_empty(balloc_get_order_list(a, i))) { + buddy = list_first_entry(balloc_get_order_list(a, i), + struct gk20a_buddy, buddy_entry); + balloc_blist_rem(a, buddy); + kmem_cache_free(buddy_cache, buddy); + } + } + + return -ENOMEM; +} + +/* + * Clean up and destroy the passed allocator. + */ +static void gk20a_buddy_allocator_destroy(struct gk20a_allocator *__a) +{ + int i; + struct rb_node *node; + struct gk20a_buddy *bud; + struct gk20a_fixed_alloc *falloc; + struct gk20a_buddy_allocator *a = __a->priv; + + alloc_lock(__a); + + gk20a_fini_alloc_debug(__a); + + /* + * Free the fixed allocs first. + */ + while ((node = rb_first(&a->fixed_allocs)) != NULL) { + falloc = container_of(node, + struct gk20a_fixed_alloc, alloced_entry); + + rb_erase(node, &a->fixed_allocs); + __balloc_do_free_fixed(a, falloc); + } + + /* + * And now free all outstanding allocations. + */ + while ((node = rb_first(&a->alloced_buddies)) != NULL) { + bud = container_of(node, struct gk20a_buddy, alloced_entry); + balloc_free_buddy(a, bud->start); + balloc_blist_add(a, bud); + balloc_coalesce(a, bud); + } + + /* + * Now clean up the unallocated buddies. + */ + for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) { + BUG_ON(a->buddy_list_alloced[i] != 0); + + while (!list_empty(balloc_get_order_list(a, i))) { + bud = list_first_entry(balloc_get_order_list(a, i), + struct gk20a_buddy, buddy_entry); + balloc_blist_rem(a, bud); + kmem_cache_free(buddy_cache, bud); + } + + if (a->buddy_list_len[i] != 0) { + pr_info("Excess buddies!!! (%d: %llu)\n", + i, a->buddy_list_len[i]); + BUG(); + } + if (a->buddy_list_split[i] != 0) { + pr_info("Excess split nodes!!! (%d: %llu)\n", + i, a->buddy_list_split[i]); + BUG(); + } + if (a->buddy_list_alloced[i] != 0) { + pr_info("Excess alloced nodes!!! (%d: %llu)\n", + i, a->buddy_list_alloced[i]); + BUG(); + } + } + + kfree(a); + + alloc_unlock(__a); +} + +/* + * Combine the passed buddy if possible. The pointer in @b may not be valid + * after this as the buddy may be freed. + * + * @a must be locked. + */ +static void balloc_coalesce(struct gk20a_buddy_allocator *a, + struct gk20a_buddy *b) +{ + struct gk20a_buddy *parent; + + if (buddy_is_alloced(b) || buddy_is_split(b)) + return; + + /* + * If both our buddy and I are both not allocated and not split then + * we can coalesce ourselves. + */ + if (!b->buddy) + return; + if (buddy_is_alloced(b->buddy) || buddy_is_split(b->buddy)) + return; + + parent = b->parent; + + balloc_blist_rem(a, b); + balloc_blist_rem(a, b->buddy); + + buddy_clr_split(parent); + a->buddy_list_split[parent->order]--; + balloc_blist_add(a, parent); + + /* + * Recursively coalesce as far as we can go. + */ + balloc_coalesce(a, parent); + + /* Clean up the remains. */ + kmem_cache_free(buddy_cache, b->buddy); + kmem_cache_free(buddy_cache, b); +} + +/* + * Split a buddy into two new buddies who are 1/2 the size of the parent buddy. + * + * @a must be locked. + */ +static int balloc_split_buddy(struct gk20a_buddy_allocator *a, + struct gk20a_buddy *b, int pte_size) +{ + struct gk20a_buddy *left, *right; + u64 half; + + left = balloc_new_buddy(a, b, b->start, b->order - 1); + if (!left) + return -ENOMEM; + + half = (b->end - b->start) / 2; + + right = balloc_new_buddy(a, b, b->start + half, b->order - 1); + if (!right) { + kmem_cache_free(buddy_cache, left); + return -ENOMEM; + } + + buddy_set_split(b); + a->buddy_list_split[b->order]++; + + b->left = left; + b->right = right; + left->buddy = right; + right->buddy = left; + left->parent = b; + right->parent = b; + + /* PTE considerations. */ + if (a->flags & GPU_BALLOC_GVA_SPACE && + left->order <= a->pte_blk_order) { + left->pte_size = pte_size; + right->pte_size = pte_size; + } + + balloc_blist_rem(a, b); + balloc_blist_add(a, left); + balloc_blist_add(a, right); + + return 0; +} + +/* + * Place the passed buddy into the RB tree for allocated buddies. Never fails + * unless the passed entry is a duplicate which is a bug. + * + * @a must be locked. + */ +static void balloc_alloc_buddy(struct gk20a_buddy_allocator *a, + struct gk20a_buddy *b) +{ + struct rb_node **new = &(a->alloced_buddies.rb_node); + struct rb_node *parent = NULL; + + while (*new) { + struct gk20a_buddy *bud = container_of(*new, struct gk20a_buddy, + alloced_entry); + + parent = *new; + if (b->start < bud->start) + new = &((*new)->rb_left); + else if (b->start > bud->start) + new = &((*new)->rb_right); + else + BUG_ON("Duplicate entries in allocated list!\n"); + } + + rb_link_node(&b->alloced_entry, parent, new); + rb_insert_color(&b->alloced_entry, &a->alloced_buddies); + + buddy_set_alloced(b); + a->buddy_list_alloced[b->order]++; +} + +/* + * Remove the passed buddy from the allocated buddy RB tree. Returns the + * deallocated buddy for further processing. + * + * @a must be locked. + */ +static struct gk20a_buddy *balloc_free_buddy(struct gk20a_buddy_allocator *a, + u64 addr) +{ + struct rb_node *node = a->alloced_buddies.rb_node; + struct gk20a_buddy *bud; + + while (node) { + bud = container_of(node, struct gk20a_buddy, alloced_entry); + + if (addr < bud->start) + node = node->rb_left; + else if (addr > bud->start) + node = node->rb_right; + else + break; + } + + if (!node) + return NULL; + + rb_erase(node, &a->alloced_buddies); + buddy_clr_alloced(bud); + a->buddy_list_alloced[bud->order]--; + + return bud; +} + +/* + * Find a suitable buddy for the given order and PTE type (big or little). + */ +static struct gk20a_buddy *__balloc_find_buddy(struct gk20a_buddy_allocator *a, + u64 order, int pte_size) +{ + struct gk20a_buddy *bud; + + if (order > a->max_order || + list_empty(balloc_get_order_list(a, order))) + return NULL; + + if (a->flags & GPU_BALLOC_GVA_SPACE && + pte_size == BALLOC_PTE_SIZE_BIG) + bud = list_last_entry(balloc_get_order_list(a, order), + struct gk20a_buddy, buddy_entry); + else + bud = list_first_entry(balloc_get_order_list(a, order), + struct gk20a_buddy, buddy_entry); + + if (bud->pte_size != BALLOC_PTE_SIZE_ANY && + bud->pte_size != pte_size) + return NULL; + + return bud; +} + +/* + * Allocate a suitably sized buddy. If no suitable buddy exists split higher + * order buddies until we have a suitable buddy to allocate. + * + * For PDE grouping add an extra check to see if a buddy is suitable: that the + * buddy exists in a PDE who's PTE size is reasonable + * + * @a must be locked. + */ +static u64 __balloc_do_alloc(struct gk20a_buddy_allocator *a, + u64 order, int pte_size) +{ + u64 split_order; + struct gk20a_buddy *bud = NULL; + + split_order = order; + while (split_order <= a->max_order && + !(bud = __balloc_find_buddy(a, split_order, pte_size))) + split_order++; + + /* Out of memory! */ + if (!bud) + return 0; + + while (bud->order != order) { + if (balloc_split_buddy(a, bud, pte_size)) + return 0; /* No mem... */ + bud = bud->left; + } + + balloc_blist_rem(a, bud); + balloc_alloc_buddy(a, bud); + + return bud->start; +} + +/* + * See if the passed range is actually available for allocation. If so, then + * return 1, otherwise return 0. + * + * TODO: Right now this uses the unoptimal approach of going through all + * outstanding allocations and checking their base/ends. This could be better. + */ +static int balloc_is_range_free(struct gk20a_buddy_allocator *a, + u64 base, u64 end) +{ + struct rb_node *node; + struct gk20a_buddy *bud; + + node = rb_first(&a->alloced_buddies); + if (!node) + return 1; /* No allocs yet. */ + + bud = container_of(node, struct gk20a_buddy, alloced_entry); + + while (bud->start < end) { + if ((bud->start > base && bud->start < end) || + (bud->end > base && bud->end < end)) + return 0; + + node = rb_next(node); + if (!node) + break; + bud = container_of(node, struct gk20a_buddy, alloced_entry); + } + + return 1; +} + +static void balloc_alloc_fixed(struct gk20a_buddy_allocator *a, + struct gk20a_fixed_alloc *f) +{ + struct rb_node **new = &(a->fixed_allocs.rb_node); + struct rb_node *parent = NULL; + + while (*new) { + struct gk20a_fixed_alloc *falloc = + container_of(*new, struct gk20a_fixed_alloc, + alloced_entry); + + BUG_ON(!virt_addr_valid(falloc)); + + parent = *new; + if (f->start < falloc->start) + new = &((*new)->rb_left); + else if (f->start > falloc->start) + new = &((*new)->rb_right); + else + BUG_ON("Duplicate entries in allocated list!\n"); + } + + rb_link_node(&f->alloced_entry, parent, new); + rb_insert_color(&f->alloced_entry, &a->fixed_allocs); +} + +/* + * Remove the passed buddy from the allocated buddy RB tree. Returns the + * deallocated buddy for further processing. + * + * @a must be locked. + */ +static struct gk20a_fixed_alloc *balloc_free_fixed( + struct gk20a_buddy_allocator *a, u64 addr) +{ + struct rb_node *node = a->fixed_allocs.rb_node; + struct gk20a_fixed_alloc *falloc; + + while (node) { + falloc = container_of(node, + struct gk20a_fixed_alloc, alloced_entry); + + if (addr < falloc->start) + node = node->rb_left; + else if (addr > falloc->start) + node = node->rb_right; + else + break; + } + + if (!node) + return NULL; + + rb_erase(node, &a->fixed_allocs); + + return falloc; +} + +/* + * Find the parent range - doesn't necessarily need the parent to actually exist + * as a buddy. Finding an existing parent comes later... + */ +static void __balloc_get_parent_range(struct gk20a_buddy_allocator *a, + u64 base, u64 order, + u64 *pbase, u64 *porder) +{ + u64 base_mask; + u64 shifted_base = balloc_base_shift(a, base); + + order++; + base_mask = ~((a->blk_size << order) - 1); + + shifted_base &= base_mask; + + *pbase = balloc_base_unshift(a, shifted_base); + *porder = order; +} + +/* + * Makes a buddy at the passed address. This will make all parent buddies + * necessary for this buddy to exist as well. + */ +static struct gk20a_buddy *__balloc_make_fixed_buddy( + struct gk20a_buddy_allocator *a, u64 base, u64 order) +{ + struct gk20a_buddy *bud = NULL; + struct list_head *order_list; + u64 cur_order = order, cur_base = base; + + /* + * Algo: + * 1. Keep jumping up a buddy order until we find the real buddy that + * this buddy exists in. + * 2. Then work our way down through the buddy tree until we hit a dead + * end. + * 3. Start splitting buddies until we split to the one we need to + * make. + */ + while (cur_order <= a->max_order) { + int found = 0; + + order_list = balloc_get_order_list(a, cur_order); + list_for_each_entry(bud, order_list, buddy_entry) { + if (bud->start == cur_base) { + found = 1; + break; + } + } + + if (found) + break; + + __balloc_get_parent_range(a, cur_base, cur_order, + &cur_base, &cur_order); + } + + if (cur_order > a->max_order) { + alloc_dbg(balloc_owner(a), "No buddy for range ???\n"); + return NULL; + } + + /* Split this buddy as necessary until we get the target buddy. */ + while (bud->start != base || bud->order != order) { + if (balloc_split_buddy(a, bud, BALLOC_PTE_SIZE_ANY)) { + balloc_coalesce(a, bud); + return NULL; + } + + if (base < bud->right->start) + bud = bud->left; + else + bud = bud->right; + + } + + return bud; +} + +static u64 __balloc_do_alloc_fixed(struct gk20a_buddy_allocator *a, + struct gk20a_fixed_alloc *falloc, + u64 base, u64 len) +{ + u64 shifted_base, inc_base; + u64 align_order; + + shifted_base = balloc_base_shift(a, base); + if (shifted_base == 0) + align_order = __fls(len >> a->blk_shift); + else + align_order = min_t(u64, + __ffs(shifted_base >> a->blk_shift), + __fls(len >> a->blk_shift)); + + if (align_order > a->max_order) { + alloc_dbg(balloc_owner(a), + "Align order too big: %llu > %llu\n", + align_order, a->max_order); + return 0; + } + + /* + * Generate a list of buddies that satisfy this allocation. + */ + inc_base = shifted_base; + while (inc_base < (shifted_base + len)) { + u64 order_len = balloc_order_to_len(a, align_order); + u64 remaining; + struct gk20a_buddy *bud; + + bud = __balloc_make_fixed_buddy(a, + balloc_base_unshift(a, inc_base), + align_order); + if (!bud) { + alloc_dbg(balloc_owner(a), + "Fixed buddy failed: {0x%llx, %llu}!\n", + balloc_base_unshift(a, inc_base), + align_order); + goto err_and_cleanup; + } + + balloc_blist_rem(a, bud); + balloc_alloc_buddy(a, bud); + __balloc_buddy_list_add(a, bud, &falloc->buddies); + + /* Book keeping. */ + inc_base += order_len; + remaining = (shifted_base + len) - inc_base; + align_order = __ffs(inc_base >> a->blk_shift); + + /* If we don't have much left - trim down align_order. */ + if (balloc_order_to_len(a, align_order) > remaining) + align_order = __balloc_max_order_in(a, inc_base, + inc_base + remaining); + } + + return base; + +err_and_cleanup: + while (!list_empty(&falloc->buddies)) { + struct gk20a_buddy *bud = list_first_entry(&falloc->buddies, + struct gk20a_buddy, + buddy_entry); + + __balloc_buddy_list_rem(a, bud); + balloc_free_buddy(a, bud->start); + kmem_cache_free(buddy_cache, bud); + } + + return 0; +} + +static void __balloc_do_free_fixed(struct gk20a_buddy_allocator *a, + struct gk20a_fixed_alloc *falloc) +{ + struct gk20a_buddy *bud; + + while (!list_empty(&falloc->buddies)) { + bud = list_first_entry(&falloc->buddies, + struct gk20a_buddy, + buddy_entry); + __balloc_buddy_list_rem(a, bud); + + balloc_free_buddy(a, bud->start); + balloc_blist_add(a, bud); + a->bytes_freed += balloc_order_to_len(a, bud->order); + + /* + * Attemp to defrag the allocation. + */ + balloc_coalesce(a, bud); + } + + kfree(falloc); +} + +/* + * Allocate memory from the passed allocator. + */ +static u64 gk20a_buddy_balloc(struct gk20a_allocator *__a, u64 len) +{ + u64 order, addr; + int pte_size; + struct gk20a_buddy_allocator *a = __a->priv; + + gk20a_alloc_trace_func(); + + alloc_lock(__a); + + order = balloc_get_order(a, len); + + if (order > a->max_order) { + alloc_unlock(__a); + alloc_dbg(balloc_owner(a), "Alloc fail\n"); + gk20a_alloc_trace_func_done(); + return 0; + } + + /* + * For now pass the base address of the allocator's region to + * __get_pte_size(). This ensures we get the right page size for + * the alloc but we don't have to know what the real address is + * going to be quite yet. + * + * TODO: once userspace supports a unified address space pass 0 for + * the base. This will make only 'len' affect the PTE size. + */ + if (a->flags & GPU_BALLOC_GVA_SPACE) + pte_size = __get_pte_size(a->vm, a->base, len); + else + pte_size = BALLOC_PTE_SIZE_ANY; + + addr = __balloc_do_alloc(a, order, pte_size); + + if (addr) { + a->bytes_alloced += len; + a->bytes_alloced_real += balloc_order_to_len(a, order); + alloc_dbg(balloc_owner(a), + "Alloc 0x%-10llx %3lld:0x%-10llx pte_size=%s\n", + addr, order, len, + pte_size == gmmu_page_size_big ? "big" : + pte_size == gmmu_page_size_small ? "small" : + "NA/any"); + } else { + alloc_dbg(balloc_owner(a), "Alloc failed: no mem!\n"); + } + + alloc_unlock(__a); + + gk20a_alloc_trace_func_done(); + return addr; +} + +/* + * Allocate a fixed address allocation. The address of the allocation is @base + * and the length is @len. This is not a typical buddy allocator operation and + * as such has a high posibility of failure if the address space is heavily in + * use. + * + * Please do not use this function unless _absolutely_ necessary. + */ +static u64 gk20a_balloc_fixed_buddy(struct gk20a_allocator *__a, + u64 base, u64 len) +{ + u64 ret, real_bytes = 0; + struct gk20a_buddy *bud; + struct gk20a_fixed_alloc *falloc = NULL; + struct gk20a_buddy_allocator *a = __a->priv; + + gk20a_alloc_trace_func(); + + /* If base isn't aligned to an order 0 block, fail. */ + if (base & (a->blk_size - 1)) + goto fail; + + if (len == 0) + goto fail; + + falloc = kmalloc(sizeof(*falloc), GFP_KERNEL); + if (!falloc) + goto fail; + + INIT_LIST_HEAD(&falloc->buddies); + falloc->start = base; + falloc->end = base + len; + + alloc_lock(__a); + if (!balloc_is_range_free(a, base, base + len)) { + alloc_dbg(balloc_owner(a), + "Range not free: 0x%llx -> 0x%llx\n", + base, base + len); + goto fail_unlock; + } + + ret = __balloc_do_alloc_fixed(a, falloc, base, len); + if (!ret) { + alloc_dbg(balloc_owner(a), + "Alloc-fixed failed ?? 0x%llx -> 0x%llx\n", + base, base + len); + goto fail_unlock; + } + + balloc_alloc_fixed(a, falloc); + + list_for_each_entry(bud, &falloc->buddies, buddy_entry) + real_bytes += (bud->end - bud->start); + + a->bytes_alloced += len; + a->bytes_alloced_real += real_bytes; + + alloc_unlock(__a); + alloc_dbg(balloc_owner(a), "Alloc (fixed) 0x%llx\n", base); + + gk20a_alloc_trace_func_done(); + return base; + +fail_unlock: + alloc_unlock(__a); +fail: + kfree(falloc); + gk20a_alloc_trace_func_done(); + return 0; +} + +/* + * Free the passed allocation. + */ +static void gk20a_buddy_bfree(struct gk20a_allocator *__a, u64 addr) +{ + struct gk20a_buddy *bud; + struct gk20a_fixed_alloc *falloc; + struct gk20a_buddy_allocator *a = __a->priv; + + gk20a_alloc_trace_func(); + + if (!addr) { + gk20a_alloc_trace_func_done(); + return; + } + + alloc_lock(__a); + + /* + * First see if this is a fixed alloc. If not fall back to a regular + * buddy. + */ + falloc = balloc_free_fixed(a, addr); + if (falloc) { + __balloc_do_free_fixed(a, falloc); + goto done; + } + + bud = balloc_free_buddy(a, addr); + if (!bud) + goto done; + + balloc_blist_add(a, bud); + a->bytes_freed += balloc_order_to_len(a, bud->order); + + /* + * Attemp to defrag the allocation. + */ + balloc_coalesce(a, bud); + +done: + alloc_unlock(__a); + alloc_dbg(balloc_owner(a), "Free 0x%llx\n", addr); + gk20a_alloc_trace_func_done(); + return; +} + +static u64 gk20a_buddy_alloc_length(struct gk20a_allocator *a) +{ + struct gk20a_buddy_allocator *ba = a->priv; + + return ba->length; +} + +static u64 gk20a_buddy_alloc_base(struct gk20a_allocator *a) +{ + struct gk20a_buddy_allocator *ba = a->priv; + + return ba->start; +} + +static int gk20a_buddy_alloc_inited(struct gk20a_allocator *a) +{ + struct gk20a_buddy_allocator *ba = a->priv; + + return ba->initialized; +} + +static u64 gk20a_buddy_alloc_end(struct gk20a_allocator *a) +{ + struct gk20a_buddy_allocator *ba = a->priv; + + return ba->end; +} + +/* + * Print the buddy allocator top level stats. If you pass @s as NULL then the + * stats are printed to the kernel log. This lets this code be used for + * debugging purposes internal to the allocator. + */ +static void gk20a_buddy_print_stats(struct gk20a_allocator *__a, + struct seq_file *s, int lock) +{ + int i; + struct rb_node *node; + struct gk20a_fixed_alloc *falloc; + struct gk20a_buddy_allocator *a = __a->priv; + + __alloc_pstat(s, __a, "base = %llu, limit = %llu, blk_size = %llu\n", + a->base, a->length, a->blk_size); + __alloc_pstat(s, __a, "Internal params:\n"); + __alloc_pstat(s, __a, " start = 0x%llx\n", a->start); + __alloc_pstat(s, __a, " end = 0x%llx\n", a->end); + __alloc_pstat(s, __a, " count = 0x%llx\n", a->count); + __alloc_pstat(s, __a, " blks = 0x%llx\n", a->blks); + __alloc_pstat(s, __a, " max_order = %llu\n", a->max_order); + + __alloc_pstat(s, __a, "Buddy blocks:\n"); + __alloc_pstat(s, __a, " Order Free Alloced Split\n"); + __alloc_pstat(s, __a, " ----- ---- ------- -----\n"); + + if (lock) + alloc_lock(__a); + for (i = a->max_order; i >= 0; i--) { + if (a->buddy_list_len[i] == 0 && + a->buddy_list_alloced[i] == 0 && + a->buddy_list_split[i] == 0) + continue; + + __alloc_pstat(s, __a, " %3d %-7llu %-9llu %llu\n", i, + a->buddy_list_len[i], + a->buddy_list_alloced[i], + a->buddy_list_split[i]); + } + + __alloc_pstat(s, __a, "\n"); + + for (node = rb_first(&a->fixed_allocs), i = 1; + node != NULL; + node = rb_next(node)) { + falloc = container_of(node, + struct gk20a_fixed_alloc, alloced_entry); + + __alloc_pstat(s, __a, "Fixed alloc (%d): [0x%llx -> 0x%llx]\n", + i, falloc->start, falloc->end); + } + + __alloc_pstat(s, __a, "\n"); + __alloc_pstat(s, __a, "Bytes allocated: %llu\n", + a->bytes_alloced); + __alloc_pstat(s, __a, "Bytes allocated (real): %llu\n", + a->bytes_alloced_real); + __alloc_pstat(s, __a, "Bytes freed: %llu\n", + a->bytes_freed); + + if (lock) + alloc_unlock(__a); +} + +static const struct gk20a_allocator_ops buddy_ops = { + .alloc = gk20a_buddy_balloc, + .free = gk20a_buddy_bfree, + + .alloc_fixed = gk20a_balloc_fixed_buddy, + /* .free_fixed not needed. */ + + .base = gk20a_buddy_alloc_base, + .length = gk20a_buddy_alloc_length, + .end = gk20a_buddy_alloc_end, + .inited = gk20a_buddy_alloc_inited, + + .fini = gk20a_buddy_allocator_destroy, + + .print_stats = gk20a_buddy_print_stats, +}; + +/* + * Initialize a buddy allocator. Returns 0 on success. This allocator does + * not necessarily manage bytes. It manages distinct ranges of resources. This + * allows the allocator to work for things like comp_tags, semaphores, etc. + * + * @allocator: Ptr to an allocator struct to init. + * @vm: GPU VM to associate this allocator with. Can be NULL. Will be used to + * get PTE size for GVA spaces. + * @name: Name of the allocator. Doesn't have to be static storage. + * @base: The base address of the resource pool being managed. + * @size: Number of resources in the pool. + * @blk_size: Minimum number of resources to allocate at once. For things like + * semaphores this is 1. For GVA this might be as much as 64k. This + * corresponds to order 0. Must be power of 2. + * @max_order: Pick a maximum order. If you leave this as 0, the buddy allocator + * will try and pick a reasonable max order. + * @flags: Extra flags necessary. See GPU_BALLOC_*. + */ +int __gk20a_buddy_allocator_init(struct gk20a_allocator *__a, + struct vm_gk20a *vm, const char *name, + u64 base, u64 size, u64 blk_size, + u64 max_order, u64 flags) +{ + int err; + struct gk20a_buddy_allocator *a; + + /* blk_size must be greater than 0 and a power of 2. */ + if (blk_size == 0) + return -EINVAL; + if (blk_size & (blk_size - 1)) + return -EINVAL; + + if (max_order > GPU_BALLOC_MAX_ORDER) + return -EINVAL; + + /* If this is to manage a GVA space we need a VM. */ + if (flags & GPU_BALLOC_GVA_SPACE && !vm) + return -EINVAL; + + a = kzalloc(sizeof(struct gk20a_buddy_allocator), GFP_KERNEL); + if (!a) + return -ENOMEM; + + err = __gk20a_alloc_common_init(__a, name, a, &buddy_ops); + if (err) + goto fail; + + a->base = base; + a->length = size; + a->blk_size = blk_size; + a->blk_shift = __ffs(blk_size); + a->owner = __a; + + /* + * If base is 0 then modfy base to be the size of one block so that we + * can return errors by returning addr == 0. + */ + if (a->base == 0) { + a->base = a->blk_size; + a->length -= a->blk_size; + } + + a->vm = vm; + if (flags & GPU_BALLOC_GVA_SPACE) + a->pte_blk_order = balloc_get_order(a, vm->big_page_size << 10); + + a->flags = flags; + a->max_order = max_order; + + balloc_allocator_align(a); + balloc_compute_max_order(a); + + /* Shared buddy kmem_cache for all allocators. */ + if (!buddy_cache) + buddy_cache = KMEM_CACHE(gk20a_buddy, 0); + if (!buddy_cache) { + err = -ENOMEM; + goto fail; + } + + a->alloced_buddies = RB_ROOT; + a->fixed_allocs = RB_ROOT; + err = balloc_init_lists(a); + if (err) + goto fail; + + a->initialized = 1; + + gk20a_init_alloc_debug(__a); + alloc_dbg(__a, "New allocator: type buddy\n"); + alloc_dbg(__a, " base 0x%llx\n", a->base); + alloc_dbg(__a, " size 0x%llx\n", a->length); + alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size); + alloc_dbg(__a, " max_order %llu\n", a->max_order); + alloc_dbg(__a, " flags 0x%llx\n", a->flags); + + return 0; + +fail: + kfree(a); + return err; +} + +int gk20a_buddy_allocator_init(struct gk20a_allocator *a, const char *name, + u64 base, u64 size, u64 blk_size, u64 flags) +{ + return __gk20a_buddy_allocator_init(a, NULL, name, + base, size, blk_size, 0, 0); +} -- cgit v1.2.2