From b6569319c772d84087a0a1a6d7146bdcae8e9aab Mon Sep 17 00:00:00 2001 From: Alex Waterman Date: Fri, 24 Jun 2016 14:12:24 -0700 Subject: gpu: nvgpu: Support multiple types of allocators Support multiple types of allocation backends. Currently there is only one allocator implementation available: a buddy allocator. Buddy allocators have certain limitations though. For one the allocator requires metadata to be allocated from the kernel's system memory. This causes a given buddy allocation to potentially sleep on a kmalloc() call. This patch has been created so that a new backend can be created which will avoid any dynamic system memory management routines from being called. Bug 1781897 Change-Id: I98d6c8402c049942f13fee69c6901a166f177f65 Signed-off-by: Alex Waterman Reviewed-on: http://git-master/r/1172115 GVS: Gerrit_Virtual_Submit Reviewed-by: Konsta Holtta Reviewed-by: Yu-Huan Hsu --- drivers/gpu/nvgpu/gk20a/as_gk20a.c | 9 +- drivers/gpu/nvgpu/gk20a/gk20a_allocator.c | 500 ++++++++++++++++++++---------- drivers/gpu/nvgpu/gk20a/gk20a_allocator.h | 169 +++++++--- drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 112 +++---- drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 7 +- drivers/gpu/nvgpu/gk20a/pmu_gk20a.c | 26 +- drivers/gpu/nvgpu/vgpu/mm_vgpu.c | 22 +- 7 files changed, 554 insertions(+), 291 deletions(-) (limited to 'drivers/gpu/nvgpu') diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c index 0571ca1f..8144ec6e 100644 --- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c @@ -279,16 +279,17 @@ static int gk20a_as_ioctl_get_va_regions( for (i = 0; i < write_entries; ++i) { struct nvgpu_as_va_region region; - struct gk20a_allocator *vma = vm->fixed.init ? + struct gk20a_allocator *vma = + gk20a_alloc_initialized(&vm->fixed) ? &vm->fixed : &vm->vma[i]; memset(®ion, 0, sizeof(struct nvgpu_as_va_region)); region.page_size = vm->gmmu_page_sizes[i]; - region.offset = vma->base; + region.offset = gk20a_alloc_base(vma); /* No __aeabi_uldivmod() on some platforms... */ - region.pages = (vma->end - vma->start) >> - ilog2(region.page_size); + region.pages = (gk20a_alloc_end(vma) - + gk20a_alloc_base(vma)) >> ilog2(region.page_size); if (copy_to_user(user_region_ptr + i, ®ion, sizeof(region))) return -EFAULT; diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c index d3a9202b..f2164768 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c @@ -17,43 +17,58 @@ */ #include -#include #include -#include #include "platform_gk20a.h" #include "gk20a_allocator.h" #include "mm_gk20a.h" -static struct dentry *balloc_debugfs_root; +static struct dentry *gk20a_alloc_debugfs_root; static struct kmem_cache *buddy_cache; /* slab cache for meta data. */ -static u32 balloc_tracing_on; +u32 gk20a_alloc_tracing_on; -#define balloc_trace_func() \ +#define gk20a_alloc_trace_func() \ do { \ - if (balloc_tracing_on) \ + if (gk20a_alloc_tracing_on) \ trace_printk("%s\n", __func__); \ } while (0) -#define balloc_trace_func_done() \ +#define gk20a_alloc_trace_func_done() \ do { \ - if (balloc_tracing_on) \ + if (gk20a_alloc_tracing_on) \ trace_printk("%s_done\n", __func__); \ } while (0) - -static void balloc_init_alloc_debug(struct gk20a_allocator *a); -static void balloc_print_stats(struct gk20a_allocator *a, struct seq_file *s, - int lock); -static struct gk20a_buddy *balloc_free_buddy(struct gk20a_allocator *a, +/* + * Buddy allocator implementation. + */ +static u64 gk20a_buddy_alloc(struct gk20a_allocator *__a, u64 len); +static void gk20a_buddy_free(struct gk20a_allocator *__a, u64 addr); +static u64 gk20a_buddy_alloc_fixed(struct gk20a_allocator *__a, + u64 base, u64 len); +static u64 gk20a_buddy_alloc_base(struct gk20a_allocator *a); +static u64 gk20a_buddy_alloc_length(struct gk20a_allocator *a); +static u64 gk20a_buddy_alloc_end(struct gk20a_allocator *a); +static int gk20a_buddy_alloc_inited(struct gk20a_allocator *a); + +static void gk20a_buddy_allocator_destroy(struct gk20a_allocator *__a); +static void gk20a_buddy_print_stats(struct gk20a_allocator *__a, + struct seq_file *s, int lock); + +/* Some other buddy allocator functions. */ +static struct gk20a_buddy *balloc_free_buddy(struct gk20a_buddy_allocator *a, u64 addr); -static void balloc_coalesce(struct gk20a_allocator *a, struct gk20a_buddy *b); -static void __balloc_do_free_fixed(struct gk20a_allocator *a, +static void balloc_coalesce(struct gk20a_buddy_allocator *a, + struct gk20a_buddy *b); +static void __balloc_do_free_fixed(struct gk20a_buddy_allocator *a, struct gk20a_fixed_alloc *falloc); +/* Debugging. */ +static void gk20a_init_alloc_debug(struct gk20a_allocator *a); + /* * This function is not present in older kernel's list.h code. */ @@ -62,6 +77,23 @@ static void __balloc_do_free_fixed(struct gk20a_allocator *a, list_entry((ptr)->prev, type, member) #endif +static const struct gk20a_allocator_ops buddy_ops = { + .alloc = gk20a_buddy_alloc, + .free = gk20a_buddy_free, + + .alloc_fixed = gk20a_buddy_alloc_fixed, + /* .free_fixed not needed. */ + + .base = gk20a_buddy_alloc_base, + .length = gk20a_buddy_alloc_length, + .end = gk20a_buddy_alloc_end, + .inited = gk20a_buddy_alloc_inited, + + .fini = gk20a_buddy_allocator_destroy, + + .print_stats = gk20a_buddy_print_stats, +}; + /* * GPU buddy allocator for various address spaces. * @@ -80,13 +112,95 @@ static void __balloc_do_free_fixed(struct gk20a_allocator *a, * easily PDE aligned so this hasn't been a problem. */ +static u64 gk20a_buddy_alloc_length(struct gk20a_allocator *a) +{ + struct gk20a_buddy_allocator *ba = a->priv; + + return ba->length; +} + +static u64 gk20a_buddy_alloc_base(struct gk20a_allocator *a) +{ + struct gk20a_buddy_allocator *ba = a->priv; + + return ba->start; +} + +static int gk20a_buddy_alloc_inited(struct gk20a_allocator *a) +{ + struct gk20a_buddy_allocator *ba = a->priv; + + return ba->inited; +} +static u64 gk20a_buddy_alloc_end(struct gk20a_allocator *a) +{ + struct gk20a_buddy_allocator *ba = a->priv; + + return ba->end; +} + +u64 gk20a_alloc_length(struct gk20a_allocator *a) +{ + return a->ops->length(a); +} + +u64 gk20a_alloc_base(struct gk20a_allocator *a) +{ + return a->ops->base(a); +} + +u64 gk20a_alloc_initialized(struct gk20a_allocator *a) +{ + if (!a->ops) + return 0; + + return a->ops->inited(a); +} + +u64 gk20a_alloc_end(struct gk20a_allocator *a) +{ + return a->ops->end(a); +} + +u64 gk20a_alloc(struct gk20a_allocator *a, u64 len) +{ + return a->ops->alloc(a, len); +} + +void gk20a_free(struct gk20a_allocator *a, u64 addr) +{ + a->ops->free(a, addr); +} + +u64 gk20a_alloc_fixed(struct gk20a_allocator *a, u64 base, u64 len) +{ + return a->ops->alloc_fixed(a, base, len); +} + +void gk20a_free_fixed(struct gk20a_allocator *a, u64 base, u64 len) +{ + /* + * If this operation is not defined for the allocator then just do + * nothing. The alternative would be to fall back on the regular + * free but that may be harmful in unexpected ways. + */ + if (a->ops->free_fixed) + a->ops->free_fixed(a, base, len); +} + +void gk20a_alloc_destroy(struct gk20a_allocator *a) +{ + a->ops->fini(a); + memset(a, 0, sizeof(*a)); +} + /* * Pick a suitable maximum order for this allocator. * * Hueristic: Just guessing that the best max order is the largest single * block that will fit in the address space. */ -static void balloc_compute_max_order(struct gk20a_allocator *a) +static void balloc_compute_max_order(struct gk20a_buddy_allocator *a) { u64 true_max_order = ilog2(a->blks); @@ -105,9 +219,10 @@ static void balloc_compute_max_order(struct gk20a_allocator *a) * Since we can only allocate in chucks of a->blk_size we need to trim off * any excess data that is not aligned to a->blk_size. */ -static void balloc_allocator_align(struct gk20a_allocator *a) +static void balloc_allocator_align(struct gk20a_buddy_allocator *a) { a->start = ALIGN(a->base, a->blk_size); + WARN_ON(a->start != a->base); a->end = (a->base + a->length) & ~(a->blk_size - 1); a->count = a->end - a->start; a->blks = a->count >> a->blk_shift; @@ -116,7 +231,7 @@ static void balloc_allocator_align(struct gk20a_allocator *a) /* * Pass NULL for parent if you want a top level buddy. */ -static struct gk20a_buddy *balloc_new_buddy(struct gk20a_allocator *a, +static struct gk20a_buddy *balloc_new_buddy(struct gk20a_buddy_allocator *a, struct gk20a_buddy *parent, u64 start, u64 order) { @@ -136,13 +251,14 @@ static struct gk20a_buddy *balloc_new_buddy(struct gk20a_allocator *a, return new_buddy; } -static void __balloc_buddy_list_add(struct gk20a_allocator *a, +static void __balloc_buddy_list_add(struct gk20a_buddy_allocator *a, struct gk20a_buddy *b, struct list_head *list) { if (buddy_is_in_list(b)) { - balloc_dbg(a, "Oops: adding added buddy (%llu:0x%llx)\n", - b->order, b->start); + alloc_dbg(balloc_owner(a), + "Oops: adding added buddy (%llu:0x%llx)\n", + b->order, b->start); BUG(); } @@ -160,12 +276,13 @@ static void __balloc_buddy_list_add(struct gk20a_allocator *a, buddy_set_in_list(b); } -static void __balloc_buddy_list_rem(struct gk20a_allocator *a, +static void __balloc_buddy_list_rem(struct gk20a_buddy_allocator *a, struct gk20a_buddy *b) { if (!buddy_is_in_list(b)) { - balloc_dbg(a, "Oops: removing removed buddy (%llu:0x%llx)\n", - b->order, b->start); + alloc_dbg(balloc_owner(a), + "Oops: removing removed buddy (%llu:0x%llx)\n", + b->order, b->start); BUG(); } @@ -177,19 +294,21 @@ static void __balloc_buddy_list_rem(struct gk20a_allocator *a, * Add a buddy to one of the buddy lists and deal with the necessary * book keeping. Adds the buddy to the list specified by the buddy's order. */ -static void balloc_blist_add(struct gk20a_allocator *a, struct gk20a_buddy *b) +static void balloc_blist_add(struct gk20a_buddy_allocator *a, + struct gk20a_buddy *b) { __balloc_buddy_list_add(a, b, balloc_get_order_list(a, b->order)); a->buddy_list_len[b->order]++; } -static void balloc_blist_rem(struct gk20a_allocator *a, struct gk20a_buddy *b) +static void balloc_blist_rem(struct gk20a_buddy_allocator *a, + struct gk20a_buddy *b) { __balloc_buddy_list_rem(a, b); a->buddy_list_len[b->order]--; } -static u64 balloc_get_order(struct gk20a_allocator *a, u64 len) +static u64 balloc_get_order(struct gk20a_buddy_allocator *a, u64 len) { if (len == 0) return 0; @@ -200,7 +319,8 @@ static u64 balloc_get_order(struct gk20a_allocator *a, u64 len) return fls(len); } -static u64 __balloc_max_order_in(struct gk20a_allocator *a, u64 start, u64 end) +static u64 __balloc_max_order_in(struct gk20a_buddy_allocator *a, + u64 start, u64 end) { u64 size = (end - start) >> a->blk_shift; @@ -213,7 +333,7 @@ static u64 __balloc_max_order_in(struct gk20a_allocator *a, u64 start, u64 end) /* * Initialize the buddy lists. */ -static int balloc_init_lists(struct gk20a_allocator *a) +static int balloc_init_lists(struct gk20a_buddy_allocator *a) { int i; u64 bstart, bend, order; @@ -252,6 +372,26 @@ cleanup: return -ENOMEM; } +/* + * Handle the common init stuff for a gk20a_allocator. + */ +static int __gk20a_alloc_common_init(struct gk20a_allocator *a, + const char *name, void *priv, + const struct gk20a_allocator_ops *ops) +{ + if (!ops) + return -EINVAL; + + a->ops = ops; + a->priv = priv; + + mutex_init(&a->lock); + + strlcpy(a->name, name, sizeof(a->name)); + + return 0; +} + /* * Initialize a buddy allocator. Returns 0 on success. This allocator does * not necessarily manage bytes. It manages distinct ranges of resources. This @@ -270,20 +410,40 @@ cleanup: * will try and pick a reasonable max order. * @flags: Extra flags necessary. See GPU_BALLOC_*. */ -int __gk20a_allocator_init(struct gk20a_allocator *a, - struct vm_gk20a *vm, const char *name, - u64 base, u64 size, u64 blk_size, u64 max_order, - u64 flags) +int __gk20a_buddy_allocator_init(struct gk20a_allocator *__a, + struct vm_gk20a *vm, const char *name, + u64 base, u64 size, u64 blk_size, + u64 max_order, u64 flags) { int err; + struct gk20a_buddy_allocator *a; + + /* blk_size must be greater than 0 and a power of 2. */ + if (blk_size == 0) + return -EINVAL; + if (blk_size & (blk_size - 1)) + return -EINVAL; - memset(a, 0, sizeof(struct gk20a_allocator)); - strncpy(a->name, name, 32); + if (max_order > GPU_BALLOC_MAX_ORDER) + return -EINVAL; + + /* If this is to manage a GVA space we need a VM. */ + if (flags & GPU_BALLOC_GVA_SPACE && !vm) + return -EINVAL; + + a = kzalloc(sizeof(struct gk20a_buddy_allocator), GFP_KERNEL); + if (!a) + return -ENOMEM; + + err = __gk20a_alloc_common_init(__a, name, a, &buddy_ops); + if (err) + goto fail; a->base = base; a->length = size; a->blk_size = blk_size; a->blk_shift = __ffs(blk_size); + a->owner = __a; /* * If base is 0 then modfy base to be the size of one block so that we @@ -294,19 +454,6 @@ int __gk20a_allocator_init(struct gk20a_allocator *a, a->length -= a->blk_size; } - /* blk_size must be greater than 0 and a power of 2. */ - if (blk_size == 0) - return -EINVAL; - if (blk_size & (blk_size - 1)) - return -EINVAL; - - if (max_order > GPU_BALLOC_MAX_ORDER) - return -EINVAL; - - /* If this is to manage a GVA space we need a VM. */ - if (flags & GPU_BALLOC_GVA_SPACE && !vm) - return -EINVAL; - a->vm = vm; if (flags & GPU_BALLOC_GVA_SPACE) a->pte_blk_order = balloc_get_order(a, vm->big_page_size << 10); @@ -320,49 +467,55 @@ int __gk20a_allocator_init(struct gk20a_allocator *a, /* Shared buddy kmem_cache for all allocators. */ if (!buddy_cache) buddy_cache = KMEM_CACHE(gk20a_buddy, 0); - if (!buddy_cache) - return -ENOMEM; + if (!buddy_cache) { + err = -ENOMEM; + goto fail; + } a->alloced_buddies = RB_ROOT; + a->fixed_allocs = RB_ROOT; err = balloc_init_lists(a); if (err) - return err; - - mutex_init(&a->lock); + goto fail; - a->init = 1; + a->inited = 1; - balloc_init_alloc_debug(a); - balloc_dbg(a, "New allocator: base 0x%llx\n", a->base); - balloc_dbg(a, " size 0x%llx\n", a->length); - balloc_dbg(a, " blk_size 0x%llx\n", a->blk_size); - balloc_dbg(a, " max_order %llu\n", a->max_order); - balloc_dbg(a, " flags 0x%llx\n", a->flags); + gk20a_init_alloc_debug(__a); + alloc_dbg(__a, "New allocator: base 0x%llx\n", a->base); + alloc_dbg(__a, " size 0x%llx\n", a->length); + alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size); + alloc_dbg(__a, " max_order %llu\n", a->max_order); + alloc_dbg(__a, " flags 0x%llx\n", a->flags); return 0; + +fail: + kfree(a); + return err; } -int gk20a_allocator_init(struct gk20a_allocator *a, const char *name, - u64 base, u64 size, u64 blk_size) +int gk20a_buddy_allocator_init(struct gk20a_allocator *a, const char *name, + u64 base, u64 size, u64 blk_size, u64 flags) { - return __gk20a_allocator_init(a, NULL, name, - base, size, blk_size, 0, 0); + return __gk20a_buddy_allocator_init(a, NULL, name, + base, size, blk_size, 0, 0); } /* * Clean up and destroy the passed allocator. */ -void gk20a_allocator_destroy(struct gk20a_allocator *a) +static void gk20a_buddy_allocator_destroy(struct gk20a_allocator *__a) { + int i; struct rb_node *node; struct gk20a_buddy *bud; struct gk20a_fixed_alloc *falloc; - int i; + struct gk20a_buddy_allocator *a = __a->priv; - balloc_lock(a); + alloc_lock(__a); - if (!IS_ERR_OR_NULL(a->debugfs_entry)) - debugfs_remove(a->debugfs_entry); + if (!IS_ERR_OR_NULL(__a->debugfs_entry)) + debugfs_remove(__a->debugfs_entry); /* * Free the fixed allocs first. @@ -415,16 +568,9 @@ void gk20a_allocator_destroy(struct gk20a_allocator *a) } } - a->init = 0; + kfree(a); - balloc_unlock(a); - - /* - * We cant unlock an allocator after memsetting it. That wipes the - * state of the mutex. Hopefully no one uses the allocator after - * destroying it... - */ - memset(a, 0, sizeof(struct gk20a_allocator)); + alloc_unlock(__a); } /* @@ -433,7 +579,8 @@ void gk20a_allocator_destroy(struct gk20a_allocator *a) * * @a must be locked. */ -static void balloc_coalesce(struct gk20a_allocator *a, struct gk20a_buddy *b) +static void balloc_coalesce(struct gk20a_buddy_allocator *a, + struct gk20a_buddy *b) { struct gk20a_buddy *parent; @@ -473,8 +620,8 @@ static void balloc_coalesce(struct gk20a_allocator *a, struct gk20a_buddy *b) * * @a must be locked. */ -static int balloc_split_buddy(struct gk20a_allocator *a, struct gk20a_buddy *b, - int pte_size) +static int balloc_split_buddy(struct gk20a_buddy_allocator *a, + struct gk20a_buddy *b, int pte_size) { struct gk20a_buddy *left, *right; u64 half; @@ -521,7 +668,8 @@ static int balloc_split_buddy(struct gk20a_allocator *a, struct gk20a_buddy *b, * * @a must be locked. */ -static void balloc_alloc_buddy(struct gk20a_allocator *a, struct gk20a_buddy *b) +static void balloc_alloc_buddy(struct gk20a_buddy_allocator *a, + struct gk20a_buddy *b) { struct rb_node **new = &(a->alloced_buddies.rb_node); struct rb_node *parent = NULL; @@ -552,7 +700,7 @@ static void balloc_alloc_buddy(struct gk20a_allocator *a, struct gk20a_buddy *b) * * @a must be locked. */ -static struct gk20a_buddy *balloc_free_buddy(struct gk20a_allocator *a, +static struct gk20a_buddy *balloc_free_buddy(struct gk20a_buddy_allocator *a, u64 addr) { struct rb_node *node = a->alloced_buddies.rb_node; @@ -582,7 +730,7 @@ static struct gk20a_buddy *balloc_free_buddy(struct gk20a_allocator *a, /* * Find a suitable buddy for the given order and PTE type (big or little). */ -static struct gk20a_buddy *__balloc_find_buddy(struct gk20a_allocator *a, +static struct gk20a_buddy *__balloc_find_buddy(struct gk20a_buddy_allocator *a, u64 order, int pte_size) { struct gk20a_buddy *bud; @@ -615,7 +763,8 @@ static struct gk20a_buddy *__balloc_find_buddy(struct gk20a_allocator *a, * * @a must be locked. */ -static u64 __balloc_do_alloc(struct gk20a_allocator *a, u64 order, int pte_size) +static u64 __balloc_do_alloc(struct gk20a_buddy_allocator *a, + u64 order, int pte_size) { u64 split_order; struct gk20a_buddy *bud = NULL; @@ -644,21 +793,22 @@ static u64 __balloc_do_alloc(struct gk20a_allocator *a, u64 order, int pte_size) /* * Allocate memory from the passed allocator. */ -u64 gk20a_balloc(struct gk20a_allocator *a, u64 len) +static u64 gk20a_buddy_alloc(struct gk20a_allocator *__a, u64 len) { u64 order, addr; int pte_size; + struct gk20a_buddy_allocator *a = __a->priv; - balloc_trace_func(); + gk20a_alloc_trace_func(); - balloc_lock(a); + alloc_lock(__a); order = balloc_get_order(a, len); if (order > a->max_order) { - balloc_unlock(a); - balloc_dbg(a, "Alloc fail\n"); - balloc_trace_func_done(); + alloc_unlock(__a); + alloc_dbg(balloc_owner(a), "Alloc fail\n"); + gk20a_alloc_trace_func_done(); return 0; } @@ -681,18 +831,19 @@ u64 gk20a_balloc(struct gk20a_allocator *a, u64 len) if (addr) { a->bytes_alloced += len; a->bytes_alloced_real += balloc_order_to_len(a, order); - balloc_dbg(a, "Alloc 0x%-10llx %3lld:0x%-10llx pte_size=%s\n", - addr, order, len, + alloc_dbg(balloc_owner(a), + "Alloc 0x%-10llx %3lld:0x%-10llx pte_size=%s\n", + addr, order, len, pte_size == gmmu_page_size_big ? "big" : pte_size == gmmu_page_size_small ? "small" : "NA/any"); } else { - balloc_dbg(a, "Alloc failed: no mem!\n"); + alloc_dbg(balloc_owner(a), "Alloc failed: no mem!\n"); } - balloc_unlock(a); + alloc_unlock(__a); - balloc_trace_func_done(); + gk20a_alloc_trace_func_done(); return addr; } @@ -703,7 +854,8 @@ u64 gk20a_balloc(struct gk20a_allocator *a, u64 len) * TODO: Right now this uses the unoptimal approach of going through all * outstanding allocations and checking their base/ends. This could be better. */ -static int balloc_is_range_free(struct gk20a_allocator *a, u64 base, u64 end) +static int balloc_is_range_free(struct gk20a_buddy_allocator *a, + u64 base, u64 end) { struct rb_node *node; struct gk20a_buddy *bud; @@ -728,7 +880,7 @@ static int balloc_is_range_free(struct gk20a_allocator *a, u64 base, u64 end) return 1; } -static void balloc_alloc_fixed(struct gk20a_allocator *a, +static void balloc_alloc_fixed(struct gk20a_buddy_allocator *a, struct gk20a_fixed_alloc *f) { struct rb_node **new = &(a->fixed_allocs.rb_node); @@ -758,8 +910,8 @@ static void balloc_alloc_fixed(struct gk20a_allocator *a, * * @a must be locked. */ -static struct gk20a_fixed_alloc *balloc_free_fixed(struct gk20a_allocator *a, - u64 addr) +static struct gk20a_fixed_alloc *balloc_free_fixed( + struct gk20a_buddy_allocator *a, u64 addr) { struct rb_node *node = a->fixed_allocs.rb_node; struct gk20a_fixed_alloc *falloc; @@ -788,7 +940,7 @@ static struct gk20a_fixed_alloc *balloc_free_fixed(struct gk20a_allocator *a, * Find the parent range - doesn't necessarily need the parent to actually exist * as a buddy. Finding an existing parent comes later... */ -static void __balloc_get_parent_range(struct gk20a_allocator *a, +static void __balloc_get_parent_range(struct gk20a_buddy_allocator *a, u64 base, u64 order, u64 *pbase, u64 *porder) { @@ -808,8 +960,8 @@ static void __balloc_get_parent_range(struct gk20a_allocator *a, * Makes a buddy at the passed address. This will make all parent buddies * necessary for this buddy to exist as well. */ -static struct gk20a_buddy *__balloc_make_fixed_buddy(struct gk20a_allocator *a, - u64 base, u64 order) +static struct gk20a_buddy *__balloc_make_fixed_buddy( + struct gk20a_buddy_allocator *a, u64 base, u64 order) { struct gk20a_buddy *bud = NULL; struct list_head *order_list; @@ -843,7 +995,7 @@ static struct gk20a_buddy *__balloc_make_fixed_buddy(struct gk20a_allocator *a, } if (cur_order > a->max_order) { - balloc_dbg(a, "No buddy for range ???\n"); + alloc_dbg(balloc_owner(a), "No buddy for range ???\n"); return NULL; } @@ -864,7 +1016,7 @@ static struct gk20a_buddy *__balloc_make_fixed_buddy(struct gk20a_allocator *a, return bud; } -static u64 __balloc_do_alloc_fixed(struct gk20a_allocator *a, +static u64 __balloc_do_alloc_fixed(struct gk20a_buddy_allocator *a, struct gk20a_fixed_alloc *falloc, u64 base, u64 len) { @@ -880,7 +1032,8 @@ static u64 __balloc_do_alloc_fixed(struct gk20a_allocator *a, __fls(len >> a->blk_shift)); if (align_order > a->max_order) { - balloc_dbg(a, "Align order too big: %llu > %llu\n", + alloc_dbg(balloc_owner(a), + "Align order too big: %llu > %llu\n", align_order, a->max_order); return 0; } @@ -898,7 +1051,8 @@ static u64 __balloc_do_alloc_fixed(struct gk20a_allocator *a, balloc_base_unshift(a, inc_base), align_order); if (!bud) { - balloc_dbg(a, "Fixed buddy failed: {0x%llx, %llu}!\n", + alloc_dbg(balloc_owner(a), + "Fixed buddy failed: {0x%llx, %llu}!\n", balloc_base_unshift(a, inc_base), align_order); goto err_and_cleanup; @@ -943,13 +1097,15 @@ err_and_cleanup: * * Please do not use this function unless _absolutely_ necessary. */ -u64 gk20a_balloc_fixed(struct gk20a_allocator *a, u64 base, u64 len) +static u64 gk20a_buddy_alloc_fixed(struct gk20a_allocator *__a, + u64 base, u64 len) { - struct gk20a_fixed_alloc *falloc = NULL; - struct gk20a_buddy *bud; u64 ret, real_bytes = 0; + struct gk20a_buddy *bud; + struct gk20a_fixed_alloc *falloc = NULL; + struct gk20a_buddy_allocator *a = __a->priv; - balloc_trace_func(); + gk20a_alloc_trace_func(); /* If base isn't aligned to an order 0 block, fail. */ if (base & (a->blk_size - 1)) @@ -966,16 +1122,18 @@ u64 gk20a_balloc_fixed(struct gk20a_allocator *a, u64 base, u64 len) falloc->start = base; falloc->end = base + len; - balloc_lock(a); + alloc_lock(__a); if (!balloc_is_range_free(a, base, base + len)) { - balloc_dbg(a, "Range not free: 0x%llx -> 0x%llx\n", + alloc_dbg(balloc_owner(a), + "Range not free: 0x%llx -> 0x%llx\n", base, base + len); goto fail_unlock; } ret = __balloc_do_alloc_fixed(a, falloc, base, len); if (!ret) { - balloc_dbg(a, "Alloc-fixed failed ?? 0x%llx -> 0x%llx\n", + alloc_dbg(balloc_owner(a), + "Alloc-fixed failed ?? 0x%llx -> 0x%llx\n", base, base + len); goto fail_unlock; } @@ -988,21 +1146,21 @@ u64 gk20a_balloc_fixed(struct gk20a_allocator *a, u64 base, u64 len) a->bytes_alloced += len; a->bytes_alloced_real += real_bytes; - balloc_unlock(a); - balloc_dbg(a, "Alloc (fixed) 0x%llx\n", base); + alloc_unlock(__a); + alloc_dbg(balloc_owner(a), "Alloc (fixed) 0x%llx\n", base); - balloc_trace_func_done(); + gk20a_alloc_trace_func_done(); return base; fail_unlock: - balloc_unlock(a); + alloc_unlock(__a); fail: kfree(falloc); - balloc_trace_func_done(); + gk20a_alloc_trace_func_done(); return 0; } -static void __balloc_do_free_fixed(struct gk20a_allocator *a, +static void __balloc_do_free_fixed(struct gk20a_buddy_allocator *a, struct gk20a_fixed_alloc *falloc) { struct gk20a_buddy *bud; @@ -1029,19 +1187,20 @@ static void __balloc_do_free_fixed(struct gk20a_allocator *a, /* * Free the passed allocation. */ -void gk20a_bfree(struct gk20a_allocator *a, u64 addr) +static void gk20a_buddy_free(struct gk20a_allocator *__a, u64 addr) { struct gk20a_buddy *bud; struct gk20a_fixed_alloc *falloc; + struct gk20a_buddy_allocator *a = __a->priv; - balloc_trace_func(); + gk20a_alloc_trace_func(); if (!addr) { - balloc_trace_func_done(); + gk20a_alloc_trace_func_done(); return; } - balloc_lock(a); + alloc_lock(__a); /* * First see if this is a fixed alloc. If not fall back to a regular @@ -1066,9 +1225,9 @@ void gk20a_bfree(struct gk20a_allocator *a, u64 addr) balloc_coalesce(a, bud); done: - balloc_unlock(a); - balloc_dbg(a, "Free 0x%llx\n", addr); - balloc_trace_func_done(); + alloc_unlock(__a); + alloc_dbg(balloc_owner(a), "Free 0x%llx\n", addr); + gk20a_alloc_trace_func_done(); return; } @@ -1077,49 +1236,42 @@ done: * stats are printed to the kernel log. This lets this code be used for * debugging purposes internal to the allocator. */ -static void balloc_print_stats(struct gk20a_allocator *a, struct seq_file *s, - int lock) +static void gk20a_buddy_print_stats(struct gk20a_allocator *__a, + struct seq_file *s, int lock) { -#define __balloc_pstat(s, fmt, arg...) \ - do { \ - if (s) \ - seq_printf(s, fmt, ##arg); \ - else \ - balloc_dbg(a, fmt, ##arg); \ - } while (0) - int i; struct rb_node *node; struct gk20a_fixed_alloc *falloc; + struct gk20a_buddy_allocator *a = __a->priv; - __balloc_pstat(s, "base = %llu, limit = %llu, blk_size = %llu\n", - a->base, a->length, a->blk_size); - __balloc_pstat(s, "Internal params:\n"); - __balloc_pstat(s, " start = 0x%llx\n", a->start); - __balloc_pstat(s, " end = 0x%llx\n", a->end); - __balloc_pstat(s, " count = 0x%llx\n", a->count); - __balloc_pstat(s, " blks = 0x%llx\n", a->blks); - __balloc_pstat(s, " max_order = %llu\n", a->max_order); + __alloc_pstat(s, __a, "base = %llu, limit = %llu, blk_size = %llu\n", + a->base, a->length, a->blk_size); + __alloc_pstat(s, __a, "Internal params:\n"); + __alloc_pstat(s, __a, " start = 0x%llx\n", a->start); + __alloc_pstat(s, __a, " end = 0x%llx\n", a->end); + __alloc_pstat(s, __a, " count = 0x%llx\n", a->count); + __alloc_pstat(s, __a, " blks = 0x%llx\n", a->blks); + __alloc_pstat(s, __a, " max_order = %llu\n", a->max_order); - __balloc_pstat(s, "Buddy blocks:\n"); - __balloc_pstat(s, " Order Free Alloced Split\n"); - __balloc_pstat(s, " ----- ---- ------- -----\n"); + __alloc_pstat(s, __a, "Buddy blocks:\n"); + __alloc_pstat(s, __a, " Order Free Alloced Split\n"); + __alloc_pstat(s, __a, " ----- ---- ------- -----\n"); if (lock) - balloc_lock(a); + alloc_lock(__a); for (i = a->max_order; i >= 0; i--) { if (a->buddy_list_len[i] == 0 && a->buddy_list_alloced[i] == 0 && a->buddy_list_split[i] == 0) continue; - __balloc_pstat(s, " %3d %-7llu %-9llu %llu\n", i, - a->buddy_list_len[i], - a->buddy_list_alloced[i], - a->buddy_list_split[i]); + __alloc_pstat(s, __a, " %3d %-7llu %-9llu %llu\n", i, + a->buddy_list_len[i], + a->buddy_list_alloced[i], + a->buddy_list_split[i]); } - __balloc_pstat(s, "\n"); + __alloc_pstat(s, __a, "\n"); for (node = rb_first(&a->fixed_allocs), i = 1; node != NULL; @@ -1127,27 +1279,33 @@ static void balloc_print_stats(struct gk20a_allocator *a, struct seq_file *s, falloc = container_of(node, struct gk20a_fixed_alloc, alloced_entry); - __balloc_pstat(s, "Fixed alloc (%d): [0x%llx -> 0x%llx]\n", - i, falloc->start, falloc->end); + __alloc_pstat(s, __a, "Fixed alloc (%d): [0x%llx -> 0x%llx]\n", + i, falloc->start, falloc->end); } - __balloc_pstat(s, "\n"); - __balloc_pstat(s, "Bytes allocated: %llu\n", a->bytes_alloced); - __balloc_pstat(s, "Bytes allocated (real): %llu\n", - a->bytes_alloced_real); - __balloc_pstat(s, "Bytes freed: %llu\n", a->bytes_freed); + __alloc_pstat(s, __a, "\n"); + __alloc_pstat(s, __a, "Bytes allocated: %llu\n", + a->bytes_alloced); + __alloc_pstat(s, __a, "Bytes allocated (real): %llu\n", + a->bytes_alloced_real); + __alloc_pstat(s, __a, "Bytes freed: %llu\n", + a->bytes_freed); if (lock) - balloc_unlock(a); + alloc_unlock(__a); +} -#undef __balloc_pstats +void gk20a_alloc_print_stats(struct gk20a_allocator *__a, + struct seq_file *s, int lock) +{ + __a->ops->print_stats(__a, s, lock); } static int __alloc_show(struct seq_file *s, void *unused) { struct gk20a_allocator *a = s->private; - balloc_print_stats(a, s, 1); + gk20a_alloc_print_stats(a, s, 1); return 0; } @@ -1164,13 +1322,13 @@ static const struct file_operations __alloc_fops = { .release = single_release, }; -static void balloc_init_alloc_debug(struct gk20a_allocator *a) +static void gk20a_init_alloc_debug(struct gk20a_allocator *a) { - if (!balloc_debugfs_root) + if (!gk20a_alloc_debugfs_root) return; a->debugfs_entry = debugfs_create_file(a->name, S_IRUGO, - balloc_debugfs_root, + gk20a_alloc_debugfs_root, a, &__alloc_fops); } @@ -1180,11 +1338,11 @@ void gk20a_alloc_debugfs_init(struct platform_device *pdev) struct gk20a_platform *platform = platform_get_drvdata(pdev); struct dentry *gpu_root = platform->debugfs; - balloc_debugfs_root = debugfs_create_dir("allocators", gpu_root); - if (IS_ERR_OR_NULL(balloc_debugfs_root)) + gk20a_alloc_debugfs_root = debugfs_create_dir("allocators", gpu_root); + if (IS_ERR_OR_NULL(gk20a_alloc_debugfs_root)) return; - debugfs_create_u32("tracing", 0664, balloc_debugfs_root, - &balloc_tracing_on); + debugfs_create_u32("tracing", 0664, gk20a_alloc_debugfs_root, + &gk20a_alloc_tracing_on); } #endif diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h index e86e053b..74e23e6c 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h @@ -20,10 +20,49 @@ #include #include #include +#include #include /* #define ALLOCATOR_DEBUG */ +struct gk20a_allocator; +struct vm_gk20a; + +/* + * Operations for an allocator to implement. + */ +struct gk20a_allocator_ops { + u64 (*alloc)(struct gk20a_allocator *allocator, u64 len); + void (*free)(struct gk20a_allocator *allocator, u64 addr); + + /* + * Special interface to allocate a memory region with a specific + * starting address. Yikes. Note: if free() works for freeing both + * regular and fixed allocations then free_fixed() does not need to + * be implemented. This behavior exists for legacy reasons and should + * not be propagated to new allocators. + */ + u64 (*alloc_fixed)(struct gk20a_allocator *allocator, + u64 base, u64 len); + void (*free_fixed)(struct gk20a_allocator *allocator, + u64 base, u64 len); + + /* + * Returns info about the allocator. + */ + u64 (*base)(struct gk20a_allocator *allocator); + u64 (*length)(struct gk20a_allocator *allocator); + u64 (*end)(struct gk20a_allocator *allocator); + int (*inited)(struct gk20a_allocator *allocator); + + /* Destructor. */ + void (*fini)(struct gk20a_allocator *allocator); + + /* Debugging. */ + void (*print_stats)(struct gk20a_allocator *allocator, + struct seq_file *s, int lock); +}; + /* * Each buddy is an element in a binary tree. */ @@ -97,8 +136,6 @@ struct gk20a_fixed_alloc { u64 end; /* End address. */ }; -struct vm_gk20a; - /* * GPU buddy allocator for the various GPU address spaces. Each addressable unit * doesn't have to correspond to a byte. In some cases each unit is a more @@ -109,12 +146,10 @@ struct vm_gk20a; * * order_size is the size of an order 0 buddy. */ -struct gk20a_allocator { - +struct gk20a_buddy_allocator { + struct gk20a_allocator *owner; /* Owner of this buddy allocator. */ struct vm_gk20a *vm; /* Parent VM - can be NULL. */ - char name[32]; /* Name of allocator. */ - u64 base; /* Base address of the space. */ u64 length; /* Length of the space. */ u64 blk_size; /* Size of order 0 allocation. */ @@ -132,11 +167,6 @@ struct gk20a_allocator { struct rb_root alloced_buddies; /* Outstanding allocations. */ struct rb_root fixed_allocs; /* Outstanding fixed allocations. */ - struct mutex lock; /* Protects buddy access. */ - -#define GPU_BALLOC_GVA_SPACE 0x1 - u64 flags; - /* * Impose an upper bound on the maximum order. */ @@ -155,52 +185,121 @@ struct gk20a_allocator { */ u64 pte_blk_order; - struct dentry *debugfs_entry; + int inited; + +#define GPU_BALLOC_GVA_SPACE 0x1 + u64 flags; u64 bytes_alloced; u64 bytes_alloced_real; u64 bytes_freed; }; -#define balloc_lock(a) mutex_lock(&(a)->lock) -#define balloc_unlock(a) mutex_unlock(&(a)->lock) +struct gk20a_allocator { + char name[32]; + struct mutex lock; -#define balloc_get_order_list(a, order) (&(a)->buddy_list[(order)]) -#define balloc_order_to_len(a, order) ((1 << order) * (a)->blk_size) -#define balloc_base_shift(a, base) ((base) - (a)->start) -#define balloc_base_unshift(a, base) ((base) + (a)->start) + void *priv; + const struct gk20a_allocator_ops *ops; -int gk20a_allocator_init(struct gk20a_allocator *allocator, - const char *name, u64 base, u64 size, u64 order0); -int __gk20a_allocator_init(struct gk20a_allocator *allocator, - struct vm_gk20a *vm, const char *name, - u64 base, u64 size, u64 order0, - u64 max_order, u64 flags); -void gk20a_allocator_destroy(struct gk20a_allocator *allocator); + struct dentry *debugfs_entry; +}; + +static inline void alloc_lock(struct gk20a_allocator *a) +{ + mutex_lock(&a->lock); +} + +static inline void alloc_unlock(struct gk20a_allocator *a) +{ + mutex_unlock(&a->lock); +} + +static inline struct gk20a_buddy_allocator *buddy_allocator( + struct gk20a_allocator *a) +{ + return (struct gk20a_buddy_allocator *)a->priv; +} + +static inline struct list_head *balloc_get_order_list( + struct gk20a_buddy_allocator *a, int order) +{ + return &a->buddy_list[order]; +} + +static inline u64 balloc_order_to_len(struct gk20a_buddy_allocator *a, + int order) +{ + return (1 << order) * a->blk_size; +} + +static inline u64 balloc_base_shift(struct gk20a_buddy_allocator *a, + u64 base) +{ + return base - a->start; +} + +static inline u64 balloc_base_unshift(struct gk20a_buddy_allocator *a, + u64 base) +{ + return base + a->start; +} + +static inline struct gk20a_allocator *balloc_owner( + struct gk20a_buddy_allocator *a) +{ + return a->owner; +} /* - * Normal alloc/free operations for the buddy allocator. + * Buddy allocator specific initializers. */ -u64 gk20a_balloc(struct gk20a_allocator *allocator, u64 len); -void gk20a_bfree(struct gk20a_allocator *allocator, u64 addr); +int __gk20a_buddy_allocator_init(struct gk20a_allocator *a, + struct vm_gk20a *vm, const char *name, + u64 base, u64 size, u64 blk_size, + u64 max_order, u64 flags); +int gk20a_buddy_allocator_init(struct gk20a_allocator *allocator, + const char *name, u64 base, u64 size, + u64 blk_size, u64 flags); /* - * Special interface to allocate a memory regions with a specific starting - * address. Yikes. + * Allocator APIs. */ -u64 gk20a_balloc_fixed(struct gk20a_allocator *allocator, u64 base, u64 len); +u64 gk20a_alloc(struct gk20a_allocator *allocator, u64 len); +void gk20a_free(struct gk20a_allocator *allocator, u64 addr); + +u64 gk20a_alloc_fixed(struct gk20a_allocator *allocator, u64 base, u64 len); +void gk20a_free_fixed(struct gk20a_allocator *allocator, u64 base, u64 len); + +u64 gk20a_alloc_base(struct gk20a_allocator *a); +u64 gk20a_alloc_length(struct gk20a_allocator *a); +u64 gk20a_alloc_end(struct gk20a_allocator *a); +u64 gk20a_alloc_initialized(struct gk20a_allocator *a); + +void gk20a_alloc_destroy(struct gk20a_allocator *allocator); + +void gk20a_alloc_print_stats(struct gk20a_allocator *a, + struct seq_file *s, int lock); /* - * Debugfs init. + * Debug stuff. */ void gk20a_alloc_debugfs_init(struct platform_device *pdev); +#define __alloc_pstat(seq, allocator, fmt, arg...) \ + do { \ + if (s) \ + seq_printf(seq, fmt, ##arg); \ + else \ + alloc_dbg(allocator, fmt, ##arg); \ + } while (0) + #if defined(ALLOCATOR_DEBUG) -#define balloc_dbg(alloctor, format, arg...) \ +#define alloc_dbg(allocator, format, arg...) \ pr_info("%-25s %25s() " format, \ - alloctor->name, __func__, ##arg) + allocator->name, __func__, ##arg) #else -#define balloc_dbg(allocator, format, arg...) +#define alloc_dbg(allocator, format, arg...) #endif #endif /* GK20A_ALLOCATOR_H */ diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 673aafda..ffc695f5 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -1331,7 +1331,7 @@ u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size, vm->gmmu_page_sizes[gmmu_pgsz_idx]>>10); - offset = gk20a_balloc(vma, size); + offset = gk20a_alloc(vma, size); if (!offset) { gk20a_err(dev_from_vm(vm), "%s oom: sz=0x%llx", vma->name, size); @@ -1350,7 +1350,7 @@ int gk20a_vm_free_va(struct vm_gk20a *vm, gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx", vma->name, offset, size); - gk20a_bfree(vma, offset); + gk20a_free(vma, offset); return 0; } @@ -3407,12 +3407,12 @@ static int gk20a_init_sema_pool(struct vm_gk20a *vm) * * !!! TODO: cleanup. */ - sema_sea->gpu_va = gk20a_balloc_fixed(&vm->vma[gmmu_page_size_kernel], - vm->va_limit - - mm->channel.kernel_size, - 512 * PAGE_SIZE); + sema_sea->gpu_va = gk20a_alloc_fixed(&vm->vma[gmmu_page_size_kernel], + vm->va_limit - + mm->channel.kernel_size, + 512 * PAGE_SIZE); if (!sema_sea->gpu_va) { - gk20a_bfree(&vm->vma[gmmu_page_size_small], sema_sea->gpu_va); + gk20a_free(&vm->vma[gmmu_page_size_small], sema_sea->gpu_va); gk20a_vm_put(vm); return -ENOMEM; } @@ -3420,7 +3420,7 @@ static int gk20a_init_sema_pool(struct vm_gk20a *vm) err = gk20a_semaphore_pool_map(vm->sema_pool, vm); if (err) { gk20a_semaphore_pool_unmap(vm->sema_pool, vm); - gk20a_bfree(&vm->vma[gmmu_page_size_small], + gk20a_free(&vm->vma[gmmu_page_size_small], vm->sema_pool->gpu_va); gk20a_vm_put(vm); } @@ -3542,13 +3542,13 @@ int gk20a_init_vm(struct mm_gk20a *mm, snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-fixed", name); - err = __gk20a_allocator_init(&vm->fixed, - vm, alloc_name, - small_vma_start, - g->separate_fixed_allocs, - SZ_4K, - GPU_BALLOC_MAX_ORDER, - GPU_BALLOC_GVA_SPACE); + err = __gk20a_buddy_allocator_init(&vm->fixed, + vm, alloc_name, + small_vma_start, + g->separate_fixed_allocs, + SZ_4K, + GPU_BALLOC_MAX_ORDER, + GPU_BALLOC_GVA_SPACE); if (err) goto clean_up_ptes; @@ -3559,13 +3559,14 @@ int gk20a_init_vm(struct mm_gk20a *mm, if (small_vma_start < small_vma_limit) { snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name, vm->gmmu_page_sizes[gmmu_page_size_small] >> 10); - err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_small], - vm, alloc_name, - small_vma_start, - small_vma_limit - small_vma_start, - SZ_4K, - GPU_BALLOC_MAX_ORDER, - GPU_BALLOC_GVA_SPACE); + err = __gk20a_buddy_allocator_init( + &vm->vma[gmmu_page_size_small], + vm, alloc_name, + small_vma_start, + small_vma_limit - small_vma_start, + SZ_4K, + GPU_BALLOC_MAX_ORDER, + GPU_BALLOC_GVA_SPACE); if (err) goto clean_up_ptes; } @@ -3573,13 +3574,14 @@ int gk20a_init_vm(struct mm_gk20a *mm, if (large_vma_start < large_vma_limit) { snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name, vm->gmmu_page_sizes[gmmu_page_size_big] >> 10); - err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_big], - vm, alloc_name, - large_vma_start, - large_vma_limit - large_vma_start, - big_page_size, - GPU_BALLOC_MAX_ORDER, - GPU_BALLOC_GVA_SPACE); + err = __gk20a_buddy_allocator_init( + &vm->vma[gmmu_page_size_big], + vm, alloc_name, + large_vma_start, + large_vma_limit - large_vma_start, + big_page_size, + GPU_BALLOC_MAX_ORDER, + GPU_BALLOC_GVA_SPACE); if (err) goto clean_up_small_allocator; } @@ -3589,13 +3591,13 @@ int gk20a_init_vm(struct mm_gk20a *mm, /* * kernel reserved VMA is at the end of the aperture */ - err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_kernel], - vm, alloc_name, - kernel_vma_start, - kernel_vma_limit - kernel_vma_start, - SZ_4K, - GPU_BALLOC_MAX_ORDER, - GPU_BALLOC_GVA_SPACE); + err = __gk20a_buddy_allocator_init(&vm->vma[gmmu_page_size_kernel], + vm, alloc_name, + kernel_vma_start, + kernel_vma_limit - kernel_vma_start, + SZ_4K, + GPU_BALLOC_MAX_ORDER, + GPU_BALLOC_GVA_SPACE); if (err) goto clean_up_big_allocator; @@ -3620,10 +3622,10 @@ int gk20a_init_vm(struct mm_gk20a *mm, clean_up_big_allocator: if (large_vma_start < large_vma_limit) - gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); + gk20a_alloc_destroy(&vm->vma[gmmu_page_size_big]); clean_up_small_allocator: if (small_vma_start < small_vma_limit) - gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); + gk20a_alloc_destroy(&vm->vma[gmmu_page_size_small]); clean_up_ptes: free_gmmu_pages(vm, &vm->pdb); clean_up_pdes: @@ -3730,15 +3732,15 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, vma = &vm->vma[pgsz_idx]; if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) { - if (vm->fixed.init) + if (gk20a_alloc_initialized(&vm->fixed)) vma = &vm->fixed; - vaddr_start = gk20a_balloc_fixed(vma, args->o_a.offset, - (u64)args->pages * - (u64)args->page_size); + vaddr_start = gk20a_alloc_fixed(vma, args->o_a.offset, + (u64)args->pages * + (u64)args->page_size); } else { - vaddr_start = gk20a_balloc(vma, - (u64)args->pages * - (u64)args->page_size); + vaddr_start = gk20a_alloc(vma, + (u64)args->pages * + (u64)args->page_size); } if (!vaddr_start) { @@ -3772,7 +3774,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, APERTURE_INVALID); if (!map_offset) { mutex_unlock(&vm->update_gmmu_lock); - gk20a_bfree(vma, vaddr_start); + gk20a_free(vma, vaddr_start); kfree(va_node); goto clean_up; } @@ -3807,11 +3809,11 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share, pgsz_idx = __nv_gmmu_va_is_big_page_region(vm, args->offset) ? gmmu_page_size_big : gmmu_page_size_small; - if (vm->fixed.init) + if (gk20a_alloc_initialized(&vm->fixed)) vma = &vm->fixed; else vma = &vm->vma[pgsz_idx]; - gk20a_bfree(vma, args->offset); + gk20a_free(vma, args->offset); mutex_lock(&vm->update_gmmu_lock); va_node = addr_to_reservation(vm, args->offset); @@ -3995,13 +3997,13 @@ int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset, void gk20a_deinit_vm(struct vm_gk20a *vm) { - gk20a_allocator_destroy(&vm->vma[gmmu_page_size_kernel]); - if (vm->vma[gmmu_page_size_big].init) - gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); - if (vm->vma[gmmu_page_size_small].init) - gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); - if (vm->fixed.init) - gk20a_allocator_destroy(&vm->fixed); + gk20a_alloc_destroy(&vm->vma[gmmu_page_size_kernel]); + if (gk20a_alloc_initialized(&vm->vma[gmmu_page_size_big])) + gk20a_alloc_destroy(&vm->vma[gmmu_page_size_big]); + if (gk20a_alloc_initialized(&vm->vma[gmmu_page_size_small])) + gk20a_alloc_destroy(&vm->vma[gmmu_page_size_small]); + if (gk20a_alloc_initialized(&vm->fixed)) + gk20a_alloc_destroy(&vm->fixed); gk20a_vm_free_entries(vm, &vm->pdb, 0); } diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 2e9172c7..66e46480 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -424,12 +424,13 @@ static inline u64 __nv_gmmu_va_small_page_limit(void) static inline int __nv_gmmu_va_is_big_page_region(struct vm_gk20a *vm, u64 addr) { + struct gk20a_allocator *a = &vm->vma[gmmu_page_size_big]; + if (!vm->big_pages) return 0; - return addr >= vm->vma[gmmu_page_size_big].base && - addr < vm->vma[gmmu_page_size_big].base + - vm->vma[gmmu_page_size_big].length; + return addr >= gk20a_alloc_base(a) && + addr < gk20a_alloc_base(a) + gk20a_alloc_length(a); } /* diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c index 347d7158..a3898993 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c @@ -2856,8 +2856,8 @@ void gk20a_remove_pmu_support(struct pmu_gk20a *pmu) { gk20a_dbg_fn(""); - if (pmu->dmem.init) - gk20a_allocator_destroy(&pmu->dmem); + if (gk20a_alloc_initialized(&pmu->dmem)) + gk20a_alloc_destroy(&pmu->dmem); } static int gk20a_init_pmu_reset_enable_hw(struct gk20a *g) @@ -3503,7 +3503,7 @@ static int pmu_init_perfmon(struct pmu_gk20a *pmu) gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data); if (!pmu->sample_buffer) - pmu->sample_buffer = gk20a_balloc(&pmu->dmem, + pmu->sample_buffer = gk20a_alloc(&pmu->dmem, 2 * sizeof(u16)); if (!pmu->sample_buffer) { gk20a_err(dev_from_gk20a(g), @@ -3605,7 +3605,7 @@ static int pmu_process_init_msg(struct pmu_gk20a *pmu, for (i = 0; i < PMU_QUEUE_COUNT; i++) pmu_queue_init(pmu, i, init); - if (!pmu->dmem.init) { + if (!gk20a_alloc_initialized(&pmu->dmem)) { /* Align start and end addresses */ u32 start = ALIGN(pv->get_pmu_init_msg_pmu_sw_mg_off(init), PMU_DMEM_ALLOC_ALIGNMENT); @@ -3613,9 +3613,9 @@ static int pmu_process_init_msg(struct pmu_gk20a *pmu, pv->get_pmu_init_msg_pmu_sw_mg_size(init)) & ~(PMU_DMEM_ALLOC_ALIGNMENT - 1); u32 size = end - start; - __gk20a_allocator_init(&pmu->dmem, NULL, "gk20a_pmu_dmem", - start, size, - PMU_DMEM_ALLOC_ALIGNMENT, 4, 0); + gk20a_buddy_allocator_init(&pmu->dmem, "gk20a_pmu_dmem", + start, size, + PMU_DMEM_ALLOC_ALIGNMENT, 0); } pmu->pmu_ready = true; @@ -3752,12 +3752,12 @@ static int pmu_response_handle(struct pmu_gk20a *pmu, seq->callback = NULL; if (pv->pmu_allocation_get_dmem_size(pmu, pv->get_pmu_seq_in_a_ptr(seq)) != 0) - gk20a_bfree(&pmu->dmem, + gk20a_free(&pmu->dmem, pv->pmu_allocation_get_dmem_offset(pmu, pv->get_pmu_seq_in_a_ptr(seq))); if (pv->pmu_allocation_get_dmem_size(pmu, pv->get_pmu_seq_out_a_ptr(seq)) != 0) - gk20a_bfree(&pmu->dmem, + gk20a_free(&pmu->dmem, pv->pmu_allocation_get_dmem_offset(pmu, pv->get_pmu_seq_out_a_ptr(seq))); @@ -4418,7 +4418,7 @@ int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd, (u16)max(payload->in.size, payload->out.size)); *(pv->pmu_allocation_get_dmem_offset_addr(pmu, in)) = - gk20a_balloc(&pmu->dmem, + gk20a_alloc(&pmu->dmem, pv->pmu_allocation_get_dmem_size(pmu, in)); if (!*(pv->pmu_allocation_get_dmem_offset_addr(pmu, in))) goto clean_up; @@ -4443,7 +4443,7 @@ int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd, if (payload->out.buf != payload->in.buf) { *(pv->pmu_allocation_get_dmem_offset_addr(pmu, out)) = - gk20a_balloc(&pmu->dmem, + gk20a_alloc(&pmu->dmem, pv->pmu_allocation_get_dmem_size(pmu, out)); if (!*(pv->pmu_allocation_get_dmem_offset_addr(pmu, out))) @@ -4474,10 +4474,10 @@ int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd, clean_up: gk20a_dbg_fn("fail"); if (in) - gk20a_bfree(&pmu->dmem, + gk20a_free(&pmu->dmem, pv->pmu_allocation_get_dmem_offset(pmu, in)); if (out) - gk20a_bfree(&pmu->dmem, + gk20a_free(&pmu->dmem, pv->pmu_allocation_get_dmem_offset(pmu, out)); pmu_seq_release(pmu, seq); diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c index 2239fcbc..c6f42703 100644 --- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c @@ -230,11 +230,11 @@ static void vgpu_vm_remove_support(struct vm_gk20a *vm) err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); WARN_ON(err || msg.ret); - gk20a_allocator_destroy(&vm->vma[gmmu_page_size_kernel]); - if (vm->vma[gmmu_page_size_small].init) - gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); - if (vm->vma[gmmu_page_size_big].init) - gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); + gk20a_alloc_destroy(&vm->vma[gmmu_page_size_kernel]); + if (gk20a_alloc_initialized(&vm->vma[gmmu_page_size_small])) + gk20a_alloc_destroy(&vm->vma[gmmu_page_size_small]); + if (gk20a_alloc_initialized(&vm->vma[gmmu_page_size_big])) + gk20a_alloc_destroy(&vm->vma[gmmu_page_size_big]); mutex_unlock(&vm->update_gmmu_lock); @@ -374,7 +374,8 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share, snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, gmmu_page_sizes[gmmu_page_size_small] >> 10); - err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_small], + err = __gk20a_buddy_allocator_init( + &vm->vma[gmmu_page_size_small], vm, name, small_vma_start, small_vma_limit - small_vma_start, @@ -388,7 +389,8 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share, if (large_vma_start < large_vma_limit) { snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, gmmu_page_sizes[gmmu_page_size_big] >> 10); - err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_big], + err = __gk20a_buddy_allocator_init( + &vm->vma[gmmu_page_size_big], vm, name, large_vma_start, large_vma_limit - large_vma_start, @@ -404,7 +406,7 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share, /* * kernel reserved VMA is at the end of the aperture */ - err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_kernel], + err = __gk20a_buddy_allocator_init(&vm->vma[gmmu_page_size_kernel], vm, name, kernel_vma_start, kernel_vma_limit - kernel_vma_start, @@ -426,10 +428,10 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share, clean_up_big_allocator: if (large_vma_start < large_vma_limit) - gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); + gk20a_alloc_destroy(&vm->vma[gmmu_page_size_big]); clean_up_small_allocator: if (small_vma_start < small_vma_limit) - gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); + gk20a_alloc_destroy(&vm->vma[gmmu_page_size_small]); clean_up_share: msg.cmd = TEGRA_VGPU_CMD_AS_FREE_SHARE; msg.handle = platform->virt_handle; -- cgit v1.2.2