From 01f359f3f1bbf95d7ac4453a6c1de811bf0aee11 Mon Sep 17 00:00:00 2001 From: Alex Waterman Date: Fri, 15 May 2015 09:13:28 -0700 Subject: Revert "Revert "gpu: nvgpu: New allocator for VA space"" This reverts commit 7eb42bc239dbd207208ff491c3fb65c3d83274d8. The original commit was actually fine. Change-Id: I564ce6530ac73fcfad17dcec9c53f0353b4f02d4 Signed-off-by: Alex Waterman Reviewed-on: http://git-master/r/743300 (cherry picked from commit e99aa2485f8992eabe3556f3ebcb57bdc8ad91ff) Reviewed-on: http://git-master/r/743301 Reviewed-by: Terje Bergstrom Tested-by: Terje Bergstrom --- drivers/gpu/nvgpu/gk20a/as_gk20a.c | 15 +- drivers/gpu/nvgpu/gk20a/gk20a.c | 2 + drivers/gpu/nvgpu/gk20a/gk20a_allocator.c | 1167 +++++++++++++++++++++++++++-- drivers/gpu/nvgpu/gk20a/gk20a_allocator.h | 213 ++++-- drivers/gpu/nvgpu/gk20a/ltc_gk20a.c | 5 +- drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 202 ++--- drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 1 + drivers/gpu/nvgpu/gk20a/pmu_gk20a.c | 68 +- drivers/gpu/nvgpu/gk20a/pmu_gk20a.h | 4 +- drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c | 15 +- drivers/gpu/nvgpu/gm20b/ltc_gm20b.c | 5 +- drivers/gpu/nvgpu/vgpu/ltc_vgpu.c | 5 +- drivers/gpu/nvgpu/vgpu/mm_vgpu.c | 36 +- 13 files changed, 1385 insertions(+), 353 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c index 63569008..eb18fa65 100644 --- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c @@ -199,21 +199,14 @@ static int gk20a_as_ioctl_get_va_regions( for (i = 0; i < write_entries; ++i) { struct nvgpu_as_va_region region; - u32 base, limit; memset(®ion, 0, sizeof(struct nvgpu_as_va_region)); - if (!vm->vma[i].constraint.enable) { - base = vm->vma[i].base; - limit = vm->vma[i].limit; - } else { - base = vm->vma[i].constraint.base; - limit = vm->vma[i].constraint.limit; - } - region.page_size = vm->gmmu_page_sizes[i]; - region.offset = (u64)base * region.page_size; - region.pages = limit - base; /* NOTE: limit is exclusive */ + region.offset = vm->vma[i].base; + /* No __aeabi_uldivmod() on some platforms... */ + region.pages = (vm->vma[i].end - vm->vma[i].start) >> + ilog2(region.page_size); if (copy_to_user(user_region_ptr + i, ®ion, sizeof(region))) return -EFAULT; diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index f3b5544f..2e88726a 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -59,6 +59,7 @@ #include "hw_fb_gk20a.h" #include "gk20a_scale.h" #include "dbg_gpu_gk20a.h" +#include "gk20a_allocator.h" #include "hal.h" #include "vgpu/vgpu.h" @@ -1532,6 +1533,7 @@ static int gk20a_probe(struct platform_device *dev) gr_gk20a_debugfs_init(gk20a); gk20a_pmu_debugfs_init(dev); gk20a_cde_debugfs_init(dev); + gk20a_alloc_debugfs_init(dev); #endif gk20a_init_gr(gk20a); diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c index 675a98a2..56fb22df 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c @@ -1,7 +1,7 @@ /* * gk20a allocator * - * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -16,112 +16,1149 @@ * along with this program. If not, see . */ +#include +#include +#include +#include + +#include "platform_gk20a.h" #include "gk20a_allocator.h" -#include -/* init allocator struct */ -int gk20a_allocator_init(struct gk20a_allocator *allocator, - const char *name, u32 start, u32 len) +#include "mm_gk20a.h" + +static struct dentry *balloc_debugfs_root; + +static struct kmem_cache *buddy_cache; /* slab cache for meta data. */ + +static u32 balloc_tracing_on; + +#define balloc_trace_func() \ + do { \ + if (balloc_tracing_on) \ + trace_printk("%s\n", __func__); \ + } while (0) + +#define balloc_trace_func_done() \ + do { \ + if (balloc_tracing_on) \ + trace_printk("%s_done\n", __func__); \ + } while (0) + + +static void balloc_init_alloc_debug(struct gk20a_allocator *a); +static void balloc_print_stats(struct gk20a_allocator *a, struct seq_file *s, + int lock); +static struct gk20a_buddy *balloc_free_buddy(struct gk20a_allocator *a, + u64 addr); +static void balloc_coalesce(struct gk20a_allocator *a, struct gk20a_buddy *b); +static void __balloc_do_free_fixed(struct gk20a_allocator *a, + struct gk20a_fixed_alloc *falloc); + +/* + * This function is not present in older kernel's list.h code. + */ +#ifndef list_last_entry +#define list_last_entry(ptr, type, member) \ + list_entry((ptr)->prev, type, member) +#endif + +/* + * GPU buddy allocator for various address spaces. + * + * Current limitations: + * o A fixed allocation could potentially be made that borders PDEs with + * different PTE sizes. This would require that fixed buffer to have + * different sized PTEs for different parts of the allocation. Probably + * best to just require PDE alignment for fixed address allocs. + * + * o It is currently possible to make an allocator that has a buddy alignment + * out of sync with the PDE block size alignment. A simple example is a + * 32GB address space starting at byte 1. Every buddy is shifted off by 1 + * which means each buddy corresponf to more than one actual GPU page. The + * best way to fix this is probably just require PDE blocksize alignment + * for the start of the address space. At the moment all allocators are + * easily PDE aligned so this hasn't been a problem. + */ + +/* + * Pick a suitable maximum order for this allocator. + * + * Hueristic: Just guessing that the best max order is the largest single + * block that will fit in the address space. + */ +static void balloc_compute_max_order(struct gk20a_allocator *a) +{ + u64 true_max_order = ilog2(a->blks); + + if (a->max_order > true_max_order) + a->max_order = true_max_order; + if (a->max_order > GPU_BALLOC_MAX_ORDER) + a->max_order = GPU_BALLOC_MAX_ORDER; +} + +/* + * Since we can only allocate in chucks of a->blk_size we need to trim off + * any excess data that is not aligned to a->blk_size. + */ +static void balloc_allocator_align(struct gk20a_allocator *a) +{ + a->start = ALIGN(a->base, a->blk_size); + a->end = (a->base + a->length) & ~(a->blk_size - 1); + a->count = a->end - a->start; + a->blks = a->count >> a->blk_shift; +} + +/* + * Pass NULL for parent if you want a top level buddy. + */ +static struct gk20a_buddy *balloc_new_buddy(struct gk20a_allocator *a, + struct gk20a_buddy *parent, + u64 start, u64 order) +{ + struct gk20a_buddy *new_buddy; + + new_buddy = kmem_cache_alloc(buddy_cache, GFP_KERNEL); + if (!new_buddy) + return NULL; + + memset(new_buddy, 0, sizeof(struct gk20a_buddy)); + + new_buddy->parent = parent; + new_buddy->start = start; + new_buddy->order = order; + new_buddy->end = start + (1 << order) * a->blk_size; + + return new_buddy; +} + +static void __balloc_buddy_list_add(struct gk20a_allocator *a, + struct gk20a_buddy *b, + struct list_head *list) +{ + if (buddy_is_in_list(b)) { + balloc_dbg(a, "Oops: adding added buddy (%llu:0x%llx)\n", + b->order, b->start); + BUG(); + } + + /* + * Add big PTE blocks to the tail, small to the head for GVA spaces. + * This lets the code that checks if there are available blocks check + * without cycling through the entire list. + */ + if (a->flags & GPU_BALLOC_GVA_SPACE && + b->pte_size == BALLOC_PTE_SIZE_BIG) + list_add_tail(&b->buddy_entry, list); + else + list_add(&b->buddy_entry, list); + + buddy_set_in_list(b); +} + +static void __balloc_buddy_list_rem(struct gk20a_allocator *a, + struct gk20a_buddy *b) +{ + if (!buddy_is_in_list(b)) { + balloc_dbg(a, "Oops: removing removed buddy (%llu:0x%llx)\n", + b->order, b->start); + BUG(); + } + + list_del_init(&b->buddy_entry); + buddy_clr_in_list(b); +} + +/* + * Add a buddy to one of the buddy lists and deal with the necessary + * book keeping. Adds the buddy to the list specified by the buddy's order. + */ +static void balloc_blist_add(struct gk20a_allocator *a, struct gk20a_buddy *b) +{ + __balloc_buddy_list_add(a, b, balloc_get_order_list(a, b->order)); + a->buddy_list_len[b->order]++; +} + +static void balloc_blist_rem(struct gk20a_allocator *a, struct gk20a_buddy *b) +{ + __balloc_buddy_list_rem(a, b); + a->buddy_list_len[b->order]--; +} + +static u64 balloc_get_order(struct gk20a_allocator *a, u64 len) +{ + if (len == 0) + return 0; + + len--; + len >>= a->blk_shift; + + return fls(len); +} + +static u64 __balloc_max_order_in(struct gk20a_allocator *a, u64 start, u64 end) +{ + u64 size = (end - start) >> a->blk_shift; + + if (size > 0) + return min_t(u64, ilog2(size), a->max_order); + else + return GPU_BALLOC_MAX_ORDER; +} + +/* + * Initialize the buddy lists. + */ +static int balloc_init_lists(struct gk20a_allocator *a) +{ + int i; + u64 bstart, bend, order; + struct gk20a_buddy *buddy; + + bstart = a->start; + bend = a->end; + + /* First make sure the LLs are valid. */ + for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) + INIT_LIST_HEAD(balloc_get_order_list(a, i)); + + while (bstart < bend) { + order = __balloc_max_order_in(a, bstart, bend); + + buddy = balloc_new_buddy(a, NULL, bstart, order); + if (!buddy) + goto cleanup; + + balloc_blist_add(a, buddy); + bstart += balloc_order_to_len(a, order); + } + + return 0; + +cleanup: + for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) { + if (!list_empty(balloc_get_order_list(a, i))) { + buddy = list_first_entry(balloc_get_order_list(a, i), + struct gk20a_buddy, buddy_entry); + balloc_blist_rem(a, buddy); + kmem_cache_free(buddy_cache, buddy); + } + } + + return -ENOMEM; +} + +/* + * Initialize a buddy allocator. Returns 0 on success. This allocator does + * not necessarily manage bytes. It manages distinct ranges of resources. This + * allows the allocator to work for things like comp_tags, semaphores, etc. + * + * @allocator: Ptr to an allocator struct to init. + * @vm: GPU VM to associate this allocator with. Can be NULL. Will be used to + * get PTE size for GVA spaces. + * @name: Name of the allocator. Doesn't have to be static storage. + * @base: The base address of the resource pool being managed. + * @size: Number of resources in the pool. + * @blk_size: Minimum number of resources to allocate at once. For things like + * semaphores this is 1. For GVA this might be as much as 64k. This + * corresponds to order 0. Must be power of 2. + * @max_order: Pick a maximum order. If you leave this as 0, the buddy allocator + * will try and pick a reasonable max order. + * @flags: Extra flags necessary. See GPU_BALLOC_*. + */ +int __gk20a_allocator_init(struct gk20a_allocator *a, + struct vm_gk20a *vm, const char *name, + u64 base, u64 size, u64 blk_size, u64 max_order, + u64 flags) { - memset(allocator, 0, sizeof(struct gk20a_allocator)); + int err; + + memset(a, 0, sizeof(struct gk20a_allocator)); + strncpy(a->name, name, 32); + + a->base = base; + a->length = size; + a->blk_size = blk_size; + a->blk_shift = __ffs(blk_size); + + /* blk_size must be greater than 0 and a power of 2. */ + if (blk_size == 0) + return -EINVAL; + if (blk_size & (blk_size - 1)) + return -EINVAL; + + if (max_order > GPU_BALLOC_MAX_ORDER) + return -EINVAL; + + /* If this is to manage a GVA space we need a VM. */ + if (flags & GPU_BALLOC_GVA_SPACE && !vm) + return -EINVAL; + + a->vm = vm; + if (flags & GPU_BALLOC_GVA_SPACE) + a->pte_blk_order = balloc_get_order(a, vm->big_page_size << 10); - strncpy(allocator->name, name, 32); + a->flags = flags; + a->max_order = max_order; - allocator->base = start; - allocator->limit = start + len - 1; + balloc_allocator_align(a); + balloc_compute_max_order(a); - allocator->bitmap = vzalloc(BITS_TO_LONGS(len) * sizeof(long)); - if (!allocator->bitmap) + /* Shared buddy kmem_cache for all allocators. */ + if (!buddy_cache) + buddy_cache = KMEM_CACHE(gk20a_buddy, 0); + if (!buddy_cache) return -ENOMEM; - allocator_dbg(allocator, "%s : base %d, limit %d", - allocator->name, allocator->base, allocator->limit); + a->alloced_buddies = RB_ROOT; + err = balloc_init_lists(a); + if (err) + return err; - init_rwsem(&allocator->rw_sema); + mutex_init(&a->lock); - allocator->alloc = gk20a_allocator_block_alloc; - allocator->free = gk20a_allocator_block_free; + a->init = 1; + + balloc_init_alloc_debug(a); + balloc_dbg(a, "New allocator: base 0x%llx\n", a->base); + balloc_dbg(a, " size 0x%llx\n", a->length); + balloc_dbg(a, " blk_size 0x%llx\n", a->blk_size); + balloc_dbg(a, " max_order %llu\n", a->max_order); + balloc_dbg(a, " flags 0x%llx\n", a->flags); return 0; } -/* destroy allocator, free all remaining blocks if any */ -void gk20a_allocator_destroy(struct gk20a_allocator *allocator) +int gk20a_allocator_init(struct gk20a_allocator *a, const char *name, + u64 base, u64 size, u64 blk_size) +{ + return __gk20a_allocator_init(a, NULL, name, + base, size, blk_size, 0, 0); +} + +/* + * Clean up and destroy the passed allocator. + */ +void gk20a_allocator_destroy(struct gk20a_allocator *a) { - down_write(&allocator->rw_sema); + struct rb_node *node; + struct gk20a_buddy *bud; + struct gk20a_fixed_alloc *falloc; + int i; + + balloc_lock(a); + + if (!IS_ERR_OR_NULL(a->debugfs_entry)) + debugfs_remove(a->debugfs_entry); + + /* + * Free the fixed allocs first. + */ + while ((node = rb_first(&a->fixed_allocs)) != NULL) { + falloc = container_of(node, + struct gk20a_fixed_alloc, alloced_entry); + + __balloc_do_free_fixed(a, falloc); + rb_erase(node, &a->fixed_allocs); + } + + /* + * And now free all outstanding allocations. + */ + while ((node = rb_first(&a->alloced_buddies)) != NULL) { + bud = container_of(node, struct gk20a_buddy, alloced_entry); + balloc_free_buddy(a, bud->start); + balloc_blist_add(a, bud); + balloc_coalesce(a, bud); + } - vfree(allocator->bitmap); + /* + * Now clean up the unallocated buddies. + */ + for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) { + BUG_ON(a->buddy_list_alloced[i] != 0); + + while (!list_empty(balloc_get_order_list(a, i))) { + bud = list_first_entry(balloc_get_order_list(a, i), + struct gk20a_buddy, buddy_entry); + balloc_blist_rem(a, bud); + kmem_cache_free(buddy_cache, bud); + } + + if (a->buddy_list_len[i] != 0) { + pr_info("Excess buddies!!! (%d: %llu)\n", + i, a->buddy_list_len[i]); + BUG(); + } + if (a->buddy_list_split[i] != 0) { + pr_info("Excess split nodes!!! (%d: %llu)\n", + i, a->buddy_list_split[i]); + BUG(); + } + if (a->buddy_list_alloced[i] != 0) { + pr_info("Excess alloced nodes!!! (%d: %llu)\n", + i, a->buddy_list_alloced[i]); + BUG(); + } + } - memset(allocator, 0, sizeof(struct gk20a_allocator)); + a->init = 0; + + balloc_unlock(a); + + /* + * We cant unlock an allocator after memsetting it. That wipes the + * state of the mutex. Hopefully no one uses the allocator after + * destroying it... + */ + memset(a, 0, sizeof(struct gk20a_allocator)); } /* - * *addr != ~0 for fixed address allocation. if *addr == 0, base addr is - * returned to caller in *addr. + * Combine the passed buddy if possible. The pointer in @b may not be valid + * after this as the buddy may be freed. * - * contiguous allocation, which allocates one block of - * contiguous address. -*/ -int gk20a_allocator_block_alloc(struct gk20a_allocator *allocator, - u32 *addr, u32 len, u32 align) + * @a must be locked. + */ +static void balloc_coalesce(struct gk20a_allocator *a, struct gk20a_buddy *b) { - unsigned long _addr; + struct gk20a_buddy *parent; - allocator_dbg(allocator, "[in] addr %d, len %d", *addr, len); + if (buddy_is_alloced(b) || buddy_is_split(b)) + return; - if ((*addr != 0 && *addr < allocator->base) || /* check addr range */ - *addr + len > allocator->limit || /* check addr range */ - *addr & (align - 1) || /* check addr alignment */ - len == 0) /* check len */ - return -EINVAL; + /* + * If both our buddy and I are both not allocated and not split then + * we can coalesce ourselves. + */ + if (!b->buddy) + return; + if (buddy_is_alloced(b->buddy) || buddy_is_split(b->buddy)) + return; + + parent = b->parent; + + balloc_blist_rem(a, b); + balloc_blist_rem(a, b->buddy); + + buddy_clr_split(parent); + a->buddy_list_split[parent->order]--; + balloc_blist_add(a, parent); + + /* + * Recursively coalesce as far as we can go. + */ + balloc_coalesce(a, parent); + + /* Clean up the remains. */ + kmem_cache_free(buddy_cache, b->buddy); + kmem_cache_free(buddy_cache, b); +} + +/* + * Split a buddy into two new buddies who are 1/2 the size of the parent buddy. + * + * @a must be locked. + */ +static int balloc_split_buddy(struct gk20a_allocator *a, struct gk20a_buddy *b, + int pte_size) +{ + struct gk20a_buddy *left, *right; + u64 half; - len = ALIGN(len, align); - if (!len) + left = balloc_new_buddy(a, b, b->start, b->order - 1); + if (!left) return -ENOMEM; - down_write(&allocator->rw_sema); + half = (b->end - b->start) / 2; - _addr = bitmap_find_next_zero_area(allocator->bitmap, - allocator->limit - allocator->base + 1, - *addr ? (*addr - allocator->base) : 0, - len, - align - 1); - if ((_addr > allocator->limit - allocator->base + 1) || - (*addr && *addr != (_addr + allocator->base))) { - up_write(&allocator->rw_sema); + right = balloc_new_buddy(a, b, b->start + half, b->order - 1); + if (!right) { + kmem_cache_free(buddy_cache, left); return -ENOMEM; } - bitmap_set(allocator->bitmap, _addr, len); - *addr = allocator->base + _addr; + buddy_set_split(b); + a->buddy_list_split[b->order]++; - up_write(&allocator->rw_sema); + b->left = left; + b->right = right; + left->buddy = right; + right->buddy = left; + left->parent = b; + right->parent = b; - allocator_dbg(allocator, "[out] addr %d, len %d", *addr, len); + /* PTE considerations. */ + if (a->flags & GPU_BALLOC_GVA_SPACE && + left->order <= a->pte_blk_order) { + left->pte_size = pte_size; + right->pte_size = pte_size; + } + + balloc_blist_rem(a, b); + balloc_blist_add(a, left); + balloc_blist_add(a, right); return 0; } -/* free all blocks between start and end */ -int gk20a_allocator_block_free(struct gk20a_allocator *allocator, - u32 addr, u32 len, u32 align) +/* + * Place the passed buddy into the RB tree for allocated buddies. Never fails + * unless the passed entry is a duplicate which is a bug. + * + * @a must be locked. + */ +void balloc_alloc_buddy(struct gk20a_allocator *a, struct gk20a_buddy *b) { - allocator_dbg(allocator, "[in] addr %d, len %d", addr, len); + struct rb_node **new = &(a->alloced_buddies.rb_node); + struct rb_node *parent = NULL; - if (addr + len > allocator->limit || /* check addr range */ - addr < allocator->base || - addr & (align - 1)) /* check addr alignment */ - return -EINVAL; + while (*new) { + struct gk20a_buddy *bud = container_of(*new, struct gk20a_buddy, + alloced_entry); - len = ALIGN(len, align); - if (!len) - return -EINVAL; + parent = *new; + if (b->start < bud->start) + new = &((*new)->rb_left); + else if (b->start > bud->start) + new = &((*new)->rb_right); + else + BUG_ON("Duplicate entries in allocated list!\n"); + } + + rb_link_node(&b->alloced_entry, parent, new); + rb_insert_color(&b->alloced_entry, &a->alloced_buddies); + + buddy_set_alloced(b); + a->buddy_list_alloced[b->order]++; +} + +/* + * Remove the passed buddy from the allocated buddy RB tree. Returns the + * deallocated buddy for further processing. + * + * @a must be locked. + */ +static struct gk20a_buddy *balloc_free_buddy(struct gk20a_allocator *a, + u64 addr) +{ + struct rb_node *node = a->alloced_buddies.rb_node; + struct gk20a_buddy *bud; + + while (node) { + bud = container_of(node, struct gk20a_buddy, alloced_entry); + + if (addr < bud->start) + node = node->rb_left; + else if (addr > bud->start) + node = node->rb_right; + else + break; + } + + if (!node) + return NULL; + + rb_erase(node, &a->alloced_buddies); + buddy_clr_alloced(bud); + a->buddy_list_alloced[bud->order]--; + + return bud; +} + +/* + * Find a suitable buddy for the given order and PTE type (big or little). + */ +static struct gk20a_buddy *__balloc_find_buddy(struct gk20a_allocator *a, + u64 order, int pte_size) +{ + struct gk20a_buddy *bud; + + if (list_empty(balloc_get_order_list(a, order))) + return NULL; + + if (a->flags & GPU_BALLOC_GVA_SPACE && + pte_size == BALLOC_PTE_SIZE_BIG) + bud = list_last_entry(balloc_get_order_list(a, order), + struct gk20a_buddy, buddy_entry); + else + bud = list_first_entry(balloc_get_order_list(a, order), + struct gk20a_buddy, buddy_entry); + + if (bud->pte_size != BALLOC_PTE_SIZE_ANY && + bud->pte_size != pte_size) + return NULL; + + return bud; +} + +/* + * Allocate a suitably sized buddy. If no suitable buddy exists split higher + * order buddies until we have a suitable buddy to allocate. + * + * For PDE grouping add an extra check to see if a buddy is suitable: that the + * buddy exists in a PDE who's PTE size is reasonable + * + * @a must be locked. + */ +static u64 __balloc_do_alloc(struct gk20a_allocator *a, u64 order, int pte_size) +{ + u64 split_order; + struct gk20a_buddy *bud; + + split_order = order; + while (!(bud = __balloc_find_buddy(a, split_order, pte_size))) + split_order++; + + while (bud->order != order) { + if (balloc_split_buddy(a, bud, pte_size)) + return 0; /* No mem... */ + bud = bud->left; + } + + balloc_blist_rem(a, bud); + balloc_alloc_buddy(a, bud); - down_write(&allocator->rw_sema); - bitmap_clear(allocator->bitmap, addr - allocator->base, len); - up_write(&allocator->rw_sema); + return bud->start; +} + +/* + * Allocate memory from the passed allocator. + */ +u64 gk20a_balloc(struct gk20a_allocator *a, u64 len) +{ + u64 order, addr; + int pte_size; + + balloc_trace_func(); + + balloc_lock(a); + + order = balloc_get_order(a, len); + + if (order > a->max_order) { + balloc_unlock(a); + balloc_dbg(a, "Alloc fail\n"); + balloc_trace_func_done(); + return 0; + } + + /* + * For now pass the base address of the allocator's region to + * __get_pte_size(). This ensures we get the right page size for + * the alloc but we don't have to know what the real address is + * going to be quite yet. + * + * TODO: once userspace supports a unified address space pass 0 for + * the base. This will make only 'len' affect the PTE size. + */ + if (a->flags & GPU_BALLOC_GVA_SPACE) + pte_size = __get_pte_size(a->vm, a->base, len); + else + pte_size = BALLOC_PTE_SIZE_ANY; + + addr = __balloc_do_alloc(a, order, pte_size); + + a->bytes_alloced += len; + a->bytes_alloced_real += balloc_order_to_len(a, order); + + balloc_unlock(a); + balloc_dbg(a, "Alloc 0x%-10llx %3lld:0x%-10llx pte_size=%s\n", + addr, order, len, + pte_size == gmmu_page_size_big ? "big" : + pte_size == gmmu_page_size_small ? "small" : + "NA/any"); + + balloc_trace_func_done(); + return addr; +} + +/* + * See if the passed range is actually available for allocation. If so, then + * return 1, otherwise return 0. + * + * TODO: Right now this uses the unoptimal approach of going through all + * outstanding allocations and checking their base/ends. This could be better. + */ +static int balloc_is_range_free(struct gk20a_allocator *a, u64 base, u64 end) +{ + struct rb_node *node; + struct gk20a_buddy *bud; + + node = rb_first(&a->alloced_buddies); + if (!node) + return 1; /* No allocs yet. */ + + bud = container_of(node, struct gk20a_buddy, alloced_entry); + + while (bud->start < end) { + if ((bud->start > base && bud->start < end) || + (bud->end > base && bud->end < end)) + return 0; + + node = rb_next(node); + if (!node) + break; + bud = container_of(node, struct gk20a_buddy, alloced_entry); + } + + return 1; +} + +static void balloc_alloc_fixed(struct gk20a_allocator *a, + struct gk20a_fixed_alloc *f) +{ + struct rb_node **new = &(a->fixed_allocs.rb_node); + struct rb_node *parent = NULL; + + while (*new) { + struct gk20a_fixed_alloc *falloc = + container_of(*new, struct gk20a_fixed_alloc, + alloced_entry); + + parent = *new; + if (f->start < falloc->start) + new = &((*new)->rb_left); + else if (f->start > falloc->start) + new = &((*new)->rb_right); + else + BUG_ON("Duplicate entries in allocated list!\n"); + } + + rb_link_node(&f->alloced_entry, parent, new); + rb_insert_color(&f->alloced_entry, &a->fixed_allocs); +} + +/* + * Remove the passed buddy from the allocated buddy RB tree. Returns the + * deallocated buddy for further processing. + * + * @a must be locked. + */ +static struct gk20a_fixed_alloc *balloc_free_fixed(struct gk20a_allocator *a, + u64 addr) +{ + struct rb_node *node = a->fixed_allocs.rb_node; + struct gk20a_fixed_alloc *falloc; + + while (node) { + falloc = container_of(node, + struct gk20a_fixed_alloc, alloced_entry); + + if (addr < falloc->start) + node = node->rb_left; + else if (addr > falloc->start) + node = node->rb_right; + else + break; + } + + if (!node) + return NULL; + + rb_erase(node, &a->fixed_allocs); + + return falloc; +} + +/* + * Find the parent range - doesn't necessarily need the parent to actually exist + * as a buddy. Finding an existing parent comes later... + */ +static void __balloc_get_parent_range(struct gk20a_allocator *a, + u64 base, u64 order, + u64 *pbase, u64 *porder) +{ + u64 base_mask; + u64 shifted_base = balloc_base_shift(a, base); + + order++; + base_mask = ~((a->blk_size << order) - 1); + + shifted_base &= base_mask; + + *pbase = balloc_base_unshift(a, shifted_base); + *porder = order; +} + +/* + * Makes a buddy at the passed address. This will make all parent buddies + * necessary for this buddy to exist as well. + */ +static struct gk20a_buddy *__balloc_make_fixed_buddy(struct gk20a_allocator *a, + u64 base, u64 order) +{ + struct gk20a_buddy *bud = NULL; + struct list_head *order_list; + u64 cur_order = order, cur_base = base; + + /* + * Algo: + * 1. Keep jumping up a buddy order until we find the real buddy that + * this buddy exists in. + * 2. Then work our way down through the buddy tree until we hit a dead + * end. + * 3. Start splitting buddies until we split to the one we need to + * make. + */ + while (cur_order <= a->max_order) { + int found = 0; + + order_list = balloc_get_order_list(a, cur_order); + list_for_each_entry(bud, order_list, buddy_entry) { + if (bud->start == cur_base) { + found = 1; + break; + } + } + + if (found) + break; + + __balloc_get_parent_range(a, cur_base, cur_order, + &cur_base, &cur_order); + } + + if (cur_order > a->max_order) { + balloc_dbg(a, "No buddy for range ???\n"); + return NULL; + } + + /* Split this buddy as necessary until we get the target buddy. */ + while (bud->start != base || bud->order != order) { + if (balloc_split_buddy(a, bud, BALLOC_PTE_SIZE_ANY)) { + balloc_coalesce(a, bud); + return NULL; + } + + if (base < bud->right->start) + bud = bud->left; + else + bud = bud->right; + + } + + return bud; +} + +static u64 __balloc_do_alloc_fixed(struct gk20a_allocator *a, + struct gk20a_fixed_alloc *falloc, + u64 base, u64 len) +{ + u64 shifted_base, inc_base; + u64 align_order; + + shifted_base = balloc_base_shift(a, base); + if (shifted_base == 0) + align_order = __fls(len >> a->blk_shift); + else + align_order = min_t(u64, + __ffs(shifted_base >> a->blk_shift), + __fls(len >> a->blk_shift)); + + if (align_order > a->max_order) { + balloc_dbg(a, "Align order too big: %llu > %llu\n", + align_order, a->max_order); + return 0; + } + + /* + * Generate a list of buddies that satisfy this allocation. + */ + inc_base = shifted_base; + while (inc_base < (shifted_base + len)) { + u64 order_len = balloc_order_to_len(a, align_order); + u64 remaining; + struct gk20a_buddy *bud; + + bud = __balloc_make_fixed_buddy(a, + balloc_base_unshift(a, inc_base), + align_order); + if (!bud) { + balloc_dbg(a, "Fixed buddy failed: {0x%llx, %llu}!\n", + balloc_base_unshift(a, inc_base), + align_order); + goto err_and_cleanup; + } + + balloc_blist_rem(a, bud); + balloc_alloc_buddy(a, bud); + __balloc_buddy_list_add(a, bud, &falloc->buddies); + + /* Book keeping. */ + inc_base += order_len; + remaining = (shifted_base + len) - inc_base; + align_order = __ffs(inc_base >> a->blk_shift); + + /* If we don't have much left - trim down align_order. */ + if (balloc_order_to_len(a, align_order) > remaining) + align_order = __balloc_max_order_in(a, inc_base, + inc_base + remaining); + } + + return base; - allocator_dbg(allocator, "[out] addr %d, len %d", addr, len); +err_and_cleanup: + while (!list_empty(&falloc->buddies)) { + struct gk20a_buddy *bud = list_first_entry(&falloc->buddies, + struct gk20a_buddy, + buddy_entry); + + __balloc_buddy_list_rem(a, bud); + balloc_free_buddy(a, bud->start); + kmem_cache_free(buddy_cache, bud); + } + + return 0; +} + +/* + * Allocate a fixed address allocation. The address of the allocation is @base + * and the length is @len. This is not a typical buddy allocator operation and + * as such has a high posibility of failure if the address space is heavily in + * use. + * + * Please do not use this function unless _absolutely_ necessary. + */ +u64 gk20a_balloc_fixed(struct gk20a_allocator *a, u64 base, u64 len) +{ + struct gk20a_fixed_alloc *falloc = NULL; + struct gk20a_buddy *bud; + u64 ret, real_bytes = 0; + + balloc_trace_func(); + + /* If base isn't aligned to an order 0 block, fail. */ + if (base & (a->blk_size - 1)) + goto fail; + + if (len == 0) + goto fail; + + falloc = kmalloc(sizeof(*falloc), GFP_KERNEL); + if (!falloc) + goto fail; + + INIT_LIST_HEAD(&falloc->buddies); + falloc->start = base; + falloc->end = base + len; + + balloc_lock(a); + if (!balloc_is_range_free(a, base, base + len)) { + balloc_dbg(a, "Range not free: 0x%llx -> 0x%llx\n", + base, base + len); + goto fail_unlock; + } + + ret = __balloc_do_alloc_fixed(a, falloc, base, len); + if (!ret) { + balloc_dbg(a, "Alloc-fixed failed ?? 0x%llx -> 0x%llx\n", + base, base + len); + goto fail_unlock; + } + + balloc_alloc_fixed(a, falloc); + + list_for_each_entry(bud, &falloc->buddies, buddy_entry) + real_bytes += (bud->end - bud->start); + + a->bytes_alloced += len; + a->bytes_alloced_real += real_bytes; + + balloc_unlock(a); + balloc_dbg(a, "Alloc (fixed) 0x%llx\n", base); + + balloc_trace_func_done(); + return base; + +fail_unlock: + balloc_unlock(a); +fail: + kfree(falloc); + balloc_trace_func_done(); + return 0; +} + +static void __balloc_do_free_fixed(struct gk20a_allocator *a, + struct gk20a_fixed_alloc *falloc) +{ + struct gk20a_buddy *bud; + + while (!list_empty(&falloc->buddies)) { + bud = list_first_entry(&falloc->buddies, + struct gk20a_buddy, + buddy_entry); + __balloc_buddy_list_rem(a, bud); + + balloc_free_buddy(a, bud->start); + balloc_blist_add(a, bud); + a->bytes_freed += balloc_order_to_len(a, bud->order); + + /* + * Attemp to defrag the allocation. + */ + balloc_coalesce(a, bud); + } + + kfree(falloc); +} + +/* + * Free the passed allocation. + */ +void gk20a_bfree(struct gk20a_allocator *a, u64 addr) +{ + struct gk20a_buddy *bud; + struct gk20a_fixed_alloc *falloc; + + balloc_trace_func(); + + if (!addr) { + balloc_trace_func_done(); + return; + } + + balloc_lock(a); + + /* + * First see if this is a fixed alloc. If not fall back to a regular + * buddy. + */ + falloc = balloc_free_fixed(a, addr); + if (falloc) { + __balloc_do_free_fixed(a, falloc); + goto done; + } + + bud = balloc_free_buddy(a, addr); + if (!bud) + goto done; + + balloc_blist_add(a, bud); + a->bytes_freed += balloc_order_to_len(a, bud->order); + + /* + * Attemp to defrag the allocation. + */ + balloc_coalesce(a, bud); + +done: + balloc_unlock(a); + balloc_dbg(a, "Free 0x%llx\n", addr); + balloc_trace_func_done(); + return; +} + +/* + * Print the buddy allocator top level stats. If you pass @s as NULL then the + * stats are printed to the kernel log. This lets this code be used for + * debugging purposes internal to the allocator. + */ +static void balloc_print_stats(struct gk20a_allocator *a, struct seq_file *s, + int lock) +{ +#define __balloc_pstat(s, fmt, arg...) \ + do { \ + if (s) \ + seq_printf(s, fmt, ##arg); \ + else \ + balloc_dbg(a, fmt, ##arg); \ + } while (0) + + int i; + struct rb_node *node; + struct gk20a_fixed_alloc *falloc; + + __balloc_pstat(s, "base = %llu, limit = %llu, blk_size = %llu\n", + a->base, a->length, a->blk_size); + __balloc_pstat(s, "Internal params:\n"); + __balloc_pstat(s, " start = %llu\n", a->start); + __balloc_pstat(s, " end = %llu\n", a->end); + __balloc_pstat(s, " count = %llu\n", a->count); + __balloc_pstat(s, " blks = %llu\n", a->blks); + __balloc_pstat(s, " max_order = %llu\n", a->max_order); + + __balloc_pstat(s, "Buddy blocks:\n"); + __balloc_pstat(s, " Order Free Alloced Split\n"); + __balloc_pstat(s, " ----- ---- ------- -----\n"); + + if (lock) + balloc_lock(a); + for (i = a->max_order; i >= 0; i--) { + if (a->buddy_list_len[i] == 0 && + a->buddy_list_alloced[i] == 0 && + a->buddy_list_split[i] == 0) + continue; + + __balloc_pstat(s, " %3d %-7llu %-9llu %llu\n", i, + a->buddy_list_len[i], + a->buddy_list_alloced[i], + a->buddy_list_split[i]); + } + + __balloc_pstat(s, "\n"); + + for (node = rb_first(&a->fixed_allocs), i = 1; + node != NULL; + node = rb_next(node)) { + falloc = container_of(node, + struct gk20a_fixed_alloc, alloced_entry); + + __balloc_pstat(s, "Fixed alloc (%d): [0x%llx -> 0x%llx]\n", + i, falloc->start, falloc->end); + } + + __balloc_pstat(s, "\n"); + __balloc_pstat(s, "Bytes allocated: %llu\n", a->bytes_alloced); + __balloc_pstat(s, "Bytes allocated (real): %llu\n", + a->bytes_alloced_real); + __balloc_pstat(s, "Bytes freed: %llu\n", a->bytes_freed); + + if (lock) + balloc_unlock(a); + +#undef __balloc_pstats +} + +static int __alloc_show(struct seq_file *s, void *unused) +{ + struct gk20a_allocator *a = s->private; + + balloc_print_stats(a, s, 1); return 0; } + +static int __alloc_open(struct inode *inode, struct file *file) +{ + return single_open(file, __alloc_show, inode->i_private); +} + +static const struct file_operations __alloc_fops = { + .open = __alloc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void balloc_init_alloc_debug(struct gk20a_allocator *a) +{ + if (!balloc_debugfs_root) + return; + + a->debugfs_entry = debugfs_create_file(a->name, S_IRUGO, + balloc_debugfs_root, + a, &__alloc_fops); +} + +void gk20a_alloc_debugfs_init(struct platform_device *pdev) +{ + struct gk20a_platform *platform = platform_get_drvdata(pdev); + struct dentry *gpu_root = platform->debugfs; + + balloc_debugfs_root = debugfs_create_dir("allocators", gpu_root); + if (IS_ERR_OR_NULL(balloc_debugfs_root)) + return; + + debugfs_create_u32("tracing", 0664, balloc_debugfs_root, + &balloc_tracing_on); +} diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h index 69a227bd..e86e053b 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -17,75 +17,190 @@ #ifndef GK20A_ALLOCATOR_H #define GK20A_ALLOCATOR_H +#include #include -#include -#include +#include +#include /* #define ALLOCATOR_DEBUG */ -/* main struct */ +/* + * Each buddy is an element in a binary tree. + */ +struct gk20a_buddy { + struct gk20a_buddy *parent; /* Parent node. */ + struct gk20a_buddy *buddy; /* This node's buddy. */ + struct gk20a_buddy *left; /* Lower address sub-node. */ + struct gk20a_buddy *right; /* Higher address sub-node. */ + + struct list_head buddy_entry; /* List entry for various lists. */ + struct rb_node alloced_entry; /* RB tree of allocations. */ + + u64 start; /* Start address of this buddy. */ + u64 end; /* End address of this buddy. */ + u64 order; /* Buddy order. */ + +#define BALLOC_BUDDY_ALLOCED 0x1 +#define BALLOC_BUDDY_SPLIT 0x2 +#define BALLOC_BUDDY_IN_LIST 0x4 + int flags; /* List of associated flags. */ + + /* + * Size of the PDE this buddy is using. This allows for grouping like + * sized allocations into the same PDE. + */ +#define BALLOC_PTE_SIZE_ANY 0x0 +#define BALLOC_PTE_SIZE_SMALL 0x1 +#define BALLOC_PTE_SIZE_BIG 0x2 + int pte_size; +}; + +#define __buddy_flag_ops(flag, flag_up) \ + static inline int buddy_is_ ## flag(struct gk20a_buddy *b) \ + { \ + return b->flags & BALLOC_BUDDY_ ## flag_up; \ + } \ + static inline void buddy_set_ ## flag(struct gk20a_buddy *b) \ + { \ + b->flags |= BALLOC_BUDDY_ ## flag_up; \ + } \ + static inline void buddy_clr_ ## flag(struct gk20a_buddy *b) \ + { \ + b->flags &= ~BALLOC_BUDDY_ ## flag_up; \ + } + +/* + * int buddy_is_alloced(struct gk20a_buddy *b); + * void buddy_set_alloced(struct gk20a_buddy *b); + * void buddy_clr_alloced(struct gk20a_buddy *b); + * + * int buddy_is_split(struct gk20a_buddy *b); + * void buddy_set_split(struct gk20a_buddy *b); + * void buddy_clr_split(struct gk20a_buddy *b); + * + * int buddy_is_in_list(struct gk20a_buddy *b); + * void buddy_set_in_list(struct gk20a_buddy *b); + * void buddy_clr_in_list(struct gk20a_buddy *b); + */ +__buddy_flag_ops(alloced, ALLOCED); +__buddy_flag_ops(split, SPLIT); +__buddy_flag_ops(in_list, IN_LIST); + +/* + * Keeps info for a fixed allocation. + */ +struct gk20a_fixed_alloc { + struct list_head buddies; /* List of buddies. */ + struct rb_node alloced_entry; /* RB tree of fixed allocations. */ + + u64 start; /* Start of fixed block. */ + u64 end; /* End address. */ +}; + +struct vm_gk20a; + +/* + * GPU buddy allocator for the various GPU address spaces. Each addressable unit + * doesn't have to correspond to a byte. In some cases each unit is a more + * complex object such as a comp_tag line or the like. + * + * The max order is computed based on the size of the minimum order and the size + * of the address space. + * + * order_size is the size of an order 0 buddy. + */ struct gk20a_allocator { - char name[32]; /* name for allocator */ - struct rb_root rb_root; /* rb tree root for blocks */ + struct vm_gk20a *vm; /* Parent VM - can be NULL. */ - u32 base; /* min value of this linear space */ - u32 limit; /* max value = limit - 1 */ + char name[32]; /* Name of allocator. */ - unsigned long *bitmap; /* bitmap */ + u64 base; /* Base address of the space. */ + u64 length; /* Length of the space. */ + u64 blk_size; /* Size of order 0 allocation. */ + u64 blk_shift; /* Shift to divide by blk_size. */ - struct gk20a_alloc_block *block_first; /* first block in list */ - struct gk20a_alloc_block *block_recent; /* last visited block */ + int init; /* Non-zero if initialized. */ - u32 first_free_addr; /* first free addr, non-contigous - allocation preferred start, - in order to pick up small holes */ - u32 last_free_addr; /* last free addr, contiguous - allocation preferred start */ - u32 cached_hole_size; /* max free hole size up to - last_free_addr */ - u32 block_count; /* number of blocks */ + /* Internal stuff. */ + u64 start; /* Real start (aligned to blk_size). */ + u64 end; /* Real end, trimmed if needed. */ + u64 count; /* Count of objects in space. */ + u64 blks; /* Count of blks in the space. */ + u64 max_order; /* Specific maximum order. */ - struct rw_semaphore rw_sema; /* lock */ - struct kmem_cache *block_cache; /* slab cache */ + struct rb_root alloced_buddies; /* Outstanding allocations. */ + struct rb_root fixed_allocs; /* Outstanding fixed allocations. */ - /* if enabled, constrain to [base, limit) */ - struct { - bool enable; - u32 base; - u32 limit; - } constraint; + struct mutex lock; /* Protects buddy access. */ - int (*alloc)(struct gk20a_allocator *allocator, - u32 *addr, u32 len, u32 align); - int (*free)(struct gk20a_allocator *allocator, - u32 addr, u32 len, u32 align); +#define GPU_BALLOC_GVA_SPACE 0x1 + u64 flags; -}; + /* + * Impose an upper bound on the maximum order. + */ +#define GPU_BALLOC_MAX_ORDER 31 +#define GPU_BALLOC_ORDER_LIST_LEN (GPU_BALLOC_MAX_ORDER + 1) -int gk20a_allocator_init(struct gk20a_allocator *allocator, - const char *name, u32 base, u32 size); -void gk20a_allocator_destroy(struct gk20a_allocator *allocator); + struct list_head buddy_list[GPU_BALLOC_ORDER_LIST_LEN]; + u64 buddy_list_len[GPU_BALLOC_ORDER_LIST_LEN]; + u64 buddy_list_split[GPU_BALLOC_ORDER_LIST_LEN]; + u64 buddy_list_alloced[GPU_BALLOC_ORDER_LIST_LEN]; -int gk20a_allocator_block_alloc(struct gk20a_allocator *allocator, - u32 *addr, u32 len, u32 align); + /* + * This is for when the allocator is managing a GVA space (the + * GPU_BALLOC_GVA_SPACE bit is set in @flags). This requires + * that we group like sized allocations into PDE blocks. + */ + u64 pte_blk_order; -int gk20a_allocator_block_free(struct gk20a_allocator *allocator, - u32 addr, u32 len, u32 align); + struct dentry *debugfs_entry; -#if defined(ALLOCATOR_DEBUG) + u64 bytes_alloced; + u64 bytes_alloced_real; + u64 bytes_freed; +}; -#define allocator_dbg(alloctor, format, arg...) \ -do { \ - if (1) \ - pr_debug("gk20a_allocator (%s) %s: " format "\n",\ - alloctor->name, __func__, ##arg);\ -} while (0) +#define balloc_lock(a) mutex_lock(&(a)->lock) +#define balloc_unlock(a) mutex_unlock(&(a)->lock) -#else /* ALLOCATOR_DEBUG */ +#define balloc_get_order_list(a, order) (&(a)->buddy_list[(order)]) +#define balloc_order_to_len(a, order) ((1 << order) * (a)->blk_size) +#define balloc_base_shift(a, base) ((base) - (a)->start) +#define balloc_base_unshift(a, base) ((base) + (a)->start) -#define allocator_dbg(format, arg...) +int gk20a_allocator_init(struct gk20a_allocator *allocator, + const char *name, u64 base, u64 size, u64 order0); +int __gk20a_allocator_init(struct gk20a_allocator *allocator, + struct vm_gk20a *vm, const char *name, + u64 base, u64 size, u64 order0, + u64 max_order, u64 flags); +void gk20a_allocator_destroy(struct gk20a_allocator *allocator); -#endif /* ALLOCATOR_DEBUG */ +/* + * Normal alloc/free operations for the buddy allocator. + */ +u64 gk20a_balloc(struct gk20a_allocator *allocator, u64 len); +void gk20a_bfree(struct gk20a_allocator *allocator, u64 addr); + +/* + * Special interface to allocate a memory regions with a specific starting + * address. Yikes. + */ +u64 gk20a_balloc_fixed(struct gk20a_allocator *allocator, u64 base, u64 len); + +/* + * Debugfs init. + */ +void gk20a_alloc_debugfs_init(struct platform_device *pdev); + +#if defined(ALLOCATOR_DEBUG) +#define balloc_dbg(alloctor, format, arg...) \ + pr_info("%-25s %25s() " format, \ + alloctor->name, __func__, ##arg) +#else +#define balloc_dbg(allocator, format, arg...) +#endif #endif /* GK20A_ALLOCATOR_H */ diff --git a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c index 02bea0a1..7cb386f0 100644 --- a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c @@ -89,9 +89,8 @@ static int gk20a_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) if (err) return err; - gk20a_allocator_init(&gr->comp_tags, "comptag", - 1, /* start */ - max_comptag_lines - 1); /* length*/ + __gk20a_allocator_init(&gr->comp_tags, NULL, "comptag", + 1, max_comptag_lines - 1, 1, 10, 0); gr->comptags_per_cacheline = comptags_per_cacheline; gr->slices_per_ltc = slices_per_fbp / g->ltc_count; diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 735c262a..a38db709 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -132,10 +132,8 @@ static void gk20a_mm_delete_priv(void *_priv) if (priv->comptags.lines) { BUG_ON(!priv->comptag_allocator); - priv->comptag_allocator->free(priv->comptag_allocator, - priv->comptags.offset, - priv->comptags.allocated_lines, - 1); + gk20a_bfree(priv->comptag_allocator, + priv->comptags.real_offset); } /* Free buffer states */ @@ -226,10 +224,9 @@ static int gk20a_alloc_comptags(struct gk20a *g, u32 *ctag_map_win_ctagline) { struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); - u32 offset = 0; - int err; u32 ctaglines_to_allocate; - u32 ctagline_align; + u32 ctagline_align = 1; + u32 offset; const u32 aggregate_cacheline_sz = g->gr.cacheline_size * g->gr.slices_per_ltc * g->ltc_count; @@ -243,7 +240,6 @@ static int gk20a_alloc_comptags(struct gk20a *g, if (!user_mappable) { ctaglines_to_allocate = lines; - ctagline_align = 1; } else { /* Unfortunately, we cannot use allocation alignment * here, since compbits per cacheline is not always a @@ -275,82 +271,26 @@ static int gk20a_alloc_comptags(struct gk20a *g, if (ctaglines_to_allocate < lines) return -EINVAL; /* integer overflow */ + pr_info("user-mapped CTAGS: %u\n", ctaglines_to_allocate); } /* store the allocator so we can use it when we free the ctags */ priv->comptag_allocator = allocator; - err = allocator->alloc(allocator, &offset, - ctaglines_to_allocate, 1); - if (!err) { - const u32 alignment_lines = - DIV_ROUND_UP(offset, ctagline_align) * ctagline_align - - offset; - - /* prune the preceding ctaglines that were allocated - for alignment */ - if (alignment_lines) { - /* free alignment lines */ - int tmp= - allocator->free(allocator, offset, - alignment_lines, - 1); - WARN_ON(tmp); - - offset += alignment_lines; - ctaglines_to_allocate -= alignment_lines; - } + offset = gk20a_balloc(allocator, ctaglines_to_allocate); + if (!offset) + return -ENOMEM; - /* check if we can prune the trailing, too */ - if (user_mappable) - { - u32 needed_cachelines = - DIV_ROUND_UP(lines, g->gr.comptags_per_cacheline); - - u32 first_unneeded_cacheline = - DIV_ROUND_UP(round_up(needed_cachelines * - aggregate_cacheline_sz, - small_pgsz), - aggregate_cacheline_sz); - u32 needed_ctaglines = - first_unneeded_cacheline * - g->gr.comptags_per_cacheline; - - u64 win_size; - - if (needed_ctaglines < ctaglines_to_allocate) { - /* free alignment lines */ - int tmp= - allocator->free( - allocator, - offset + needed_ctaglines, - (ctaglines_to_allocate - - needed_ctaglines), - 1); - WARN_ON(tmp); - - ctaglines_to_allocate = needed_ctaglines; - } + priv->comptags.lines = lines; + priv->comptags.real_offset = offset; - *ctag_map_win_ctagline = offset; - win_size = - DIV_ROUND_UP(lines, - g->gr.comptags_per_cacheline) * - aggregate_cacheline_sz; + if (user_mappable) + offset = DIV_ROUND_UP(offset, ctagline_align) * ctagline_align; - *ctag_map_win_size = round_up(win_size, small_pgsz); - } + priv->comptags.offset = offset; - priv->comptags.offset = offset; - priv->comptags.lines = lines; - priv->comptags.allocated_lines = ctaglines_to_allocate; - priv->comptags.user_mappable = user_mappable; - } - return err; + return 0; } - - - static int gk20a_init_mm_reset_enable_hw(struct gk20a *g) { gk20a_dbg_fn(""); @@ -901,14 +841,12 @@ static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset) } u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, - u64 size, - enum gmmu_pgsz_gk20a gmmu_pgsz_idx) + u64 size, + enum gmmu_pgsz_gk20a gmmu_pgsz_idx) { struct gk20a_allocator *vma = &vm->vma[gmmu_pgsz_idx]; - int err; u64 offset; - u32 start_page_nr = 0, num_pages; u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; if (gmmu_pgsz_idx >= gmmu_nr_page_sizes) { @@ -924,28 +862,19 @@ u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, } - /* be certain we round up to gmmu_page_size if needed */ - /* TBD: DIV_ROUND_UP -> undefined reference to __aeabi_uldivmod */ + /* Be certain we round up to gmmu_page_size if needed */ size = (size + ((u64)gmmu_page_size - 1)) & ~((u64)gmmu_page_size - 1); - gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size, vm->gmmu_page_sizes[gmmu_pgsz_idx]>>10); - /* The vma allocator represents page accounting. */ - num_pages = size >> ilog2(vm->gmmu_page_sizes[gmmu_pgsz_idx]); - - err = vma->alloc(vma, &start_page_nr, num_pages, 1); - - if (err) { + offset = gk20a_balloc(vma, size); + if (!offset) { gk20a_err(dev_from_vm(vm), - "%s oom: sz=0x%llx", vma->name, size); + "%s oom: sz=0x%llx", vma->name, size); return 0; } - offset = (u64)start_page_nr << - ilog2(vm->gmmu_page_sizes[gmmu_pgsz_idx]); gk20a_dbg_fn("%s found addr: 0x%llx", vma->name, offset); - return offset; } @@ -954,25 +883,12 @@ int gk20a_vm_free_va(struct vm_gk20a *vm, enum gmmu_pgsz_gk20a pgsz_idx) { struct gk20a_allocator *vma = &vm->vma[pgsz_idx]; - u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; - u32 page_shift = ilog2(page_size); - u32 start_page_nr, num_pages; - int err; gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx", vma->name, offset, size); + gk20a_bfree(vma, offset); - start_page_nr = (u32)(offset >> page_shift); - num_pages = (u32)((size + page_size - 1) >> page_shift); - - err = vma->free(vma, start_page_nr, num_pages, 1); - if (err) { - gk20a_err(dev_from_vm(vm), - "not found: offset=0x%llx, sz=0x%llx", - offset, size); - } - - return err; + return 0; } static int insert_mapped_buffer(struct rb_root *root, @@ -1169,7 +1085,7 @@ static int validate_fixed_buffer(struct vm_gk20a *vm, if (map_offset & (vm->gmmu_page_sizes[bfr->pgsz_idx] - 1)) { gk20a_err(dev, "map offset must be buffer page size aligned 0x%llx", - map_offset); + map_offset); return -EINVAL; } @@ -2613,7 +2529,6 @@ int gk20a_init_vm(struct mm_gk20a *mm, char *name) { int err, i; - u32 num_small_pages, num_large_pages, low_hole_pages; char alloc_name[32]; u64 small_vma_size, large_vma_size; u32 pde_lo, pde_hi; @@ -2674,34 +2589,31 @@ int gk20a_init_vm(struct mm_gk20a *mm, large_vma_size = vm->va_limit - small_vma_size; } - num_small_pages = (u32)(small_vma_size >> - ilog2(vm->gmmu_page_sizes[gmmu_page_size_small])); - - /* num_pages above is without regard to the low-side hole. */ - low_hole_pages = (vm->va_start >> - ilog2(vm->gmmu_page_sizes[gmmu_page_size_small])); - snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name, vm->gmmu_page_sizes[gmmu_page_size_small]>>10); - err = gk20a_allocator_init(&vm->vma[gmmu_page_size_small], - alloc_name, - low_hole_pages, /*start*/ - num_small_pages - low_hole_pages);/* length*/ + err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_small], + vm, alloc_name, + vm->va_start, + small_vma_size - vm->va_start, + SZ_4K, + GPU_BALLOC_MAX_ORDER, + GPU_BALLOC_GVA_SPACE); if (err) goto clean_up_ptes; if (big_pages) { - u32 start = (u32)(small_vma_size >> - ilog2(vm->gmmu_page_sizes[gmmu_page_size_big])); - num_large_pages = (u32)(large_vma_size >> - ilog2(vm->gmmu_page_sizes[gmmu_page_size_big])); - snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name, vm->gmmu_page_sizes[gmmu_page_size_big]>>10); - err = gk20a_allocator_init(&vm->vma[gmmu_page_size_big], - alloc_name, - start, /* start */ - num_large_pages); /* length */ + /* + * Big page VMA starts at the end of the small page VMA. + */ + err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_big], + vm, alloc_name, + small_vma_size, + large_vma_size, + big_page_size, + GPU_BALLOC_MAX_ORDER, + GPU_BALLOC_GVA_SPACE); if (err) goto clean_up_small_allocator; } @@ -2782,9 +2694,9 @@ int gk20a_vm_release_share(struct gk20a_as_share *as_share) int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, struct nvgpu_as_alloc_space_args *args) -{ int err = -ENOMEM; +{ + int err = -ENOMEM; int pgsz_idx = gmmu_page_size_small; - u32 start_page_nr; struct gk20a_allocator *vma; struct vm_gk20a *vm = as_share->vm; struct gk20a *g = vm->mm->g; @@ -2815,21 +2727,19 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, goto clean_up; } - start_page_nr = 0; + vma = &vm->vma[pgsz_idx]; if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) - start_page_nr = (u32)(args->o_a.offset >> - ilog2(vm->gmmu_page_sizes[pgsz_idx])); + vaddr_start = gk20a_balloc_fixed(vma, args->o_a.offset, + (u64)args->pages * + (u64)args->page_size); + else + vaddr_start = gk20a_balloc(vma, args->pages * args->page_size); - vma = &vm->vma[pgsz_idx]; - err = vma->alloc(vma, &start_page_nr, args->pages, 1); - if (err) { + if (!vaddr_start) { kfree(va_node); goto clean_up; } - vaddr_start = (u64)start_page_nr << - ilog2(vm->gmmu_page_sizes[pgsz_idx]); - va_node->vaddr_start = vaddr_start; va_node->size = (u64)args->page_size * (u64)args->pages; va_node->pgsz_idx = pgsz_idx; @@ -2853,7 +2763,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, true); if (!map_offset) { mutex_unlock(&vm->update_gmmu_lock); - vma->free(vma, start_page_nr, args->pages, 1); + gk20a_bfree(vma, vaddr_start); kfree(va_node); goto clean_up; } @@ -2865,6 +2775,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, mutex_unlock(&vm->update_gmmu_lock); args->o_a.offset = vaddr_start; + err = 0; clean_up: return err; @@ -2875,7 +2786,6 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share, { int err = -ENOMEM; int pgsz_idx; - u32 start_page_nr; struct gk20a_allocator *vma; struct vm_gk20a *vm = as_share->vm; struct vm_reserved_va_node *va_node; @@ -2888,14 +2798,8 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share, pgsz_idx = __nv_gmmu_va_is_upper(vm, args->offset) ? gmmu_page_size_big : gmmu_page_size_small; - start_page_nr = (u32)(args->offset >> - ilog2(vm->gmmu_page_sizes[pgsz_idx])); - vma = &vm->vma[pgsz_idx]; - err = vma->free(vma, start_page_nr, args->pages, 1); - - if (err) - goto clean_up; + gk20a_bfree(vma, args->offset); mutex_lock(&vm->update_gmmu_lock); va_node = addr_to_reservation(vm, args->offset); @@ -2925,8 +2829,8 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share, kfree(va_node); } mutex_unlock(&vm->update_gmmu_lock); + err = 0; -clean_up: return err; } diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index c1f8a4f0..82003cd0 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -131,6 +131,7 @@ enum gmmu_pgsz_gk20a { }; struct gk20a_comptags { + u32 real_offset; u32 offset; u32 lines; u32 allocated_lines; diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c index 2456c784..11322293 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c @@ -2816,7 +2816,6 @@ static int pmu_init_perfmon(struct pmu_gk20a *pmu) struct pmu_payload payload; u32 seq; u32 data; - int err = 0; gk20a_dbg_fn(""); @@ -2867,12 +2866,11 @@ static int pmu_init_perfmon(struct pmu_gk20a *pmu) gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data); if (!pmu->sample_buffer) - err = pmu->dmem.alloc(&pmu->dmem, - &pmu->sample_buffer, 2 * sizeof(u16), - PMU_DMEM_ALLOC_ALIGNMENT); - if (err) { + pmu->sample_buffer = gk20a_balloc(&pmu->dmem, + 2 * sizeof(u16)); + if (!pmu->sample_buffer) { gk20a_err(dev_from_gk20a(g), - "failed to allocate perfmon sample buffer"); + "failed to allocate perfmon sample buffer"); return -ENOMEM; } @@ -2970,15 +2968,17 @@ static int pmu_process_init_msg(struct pmu_gk20a *pmu, for (i = 0; i < PMU_QUEUE_COUNT; i++) pmu_queue_init(pmu, i, init); - if (!pmu->dmem.alloc) { - /*Align start and end addresses*/ + if (!pmu->dmem.init) { + /* Align start and end addresses */ u32 start = ALIGN(pv->get_pmu_init_msg_pmu_sw_mg_off(init), - PMU_DMEM_ALLOC_ALIGNMENT); + PMU_DMEM_ALLOC_ALIGNMENT); u32 end = (pv->get_pmu_init_msg_pmu_sw_mg_off(init) + - pv->get_pmu_init_msg_pmu_sw_mg_size(init)) & + pv->get_pmu_init_msg_pmu_sw_mg_size(init)) & ~(PMU_DMEM_ALLOC_ALIGNMENT - 1); u32 size = end - start; - gk20a_allocator_init(&pmu->dmem, "gk20a_pmu_dmem", start, size); + __gk20a_allocator_init(&pmu->dmem, NULL, "gk20a_pmu_dmem", + start, size, + PMU_DMEM_ALLOC_ALIGNMENT, 4, 0); } pmu->pmu_ready = true; @@ -3115,20 +3115,14 @@ static int pmu_response_handle(struct pmu_gk20a *pmu, seq->callback = NULL; if (pv->pmu_allocation_get_dmem_size(pmu, pv->get_pmu_seq_in_a_ptr(seq)) != 0) - pmu->dmem.free(&pmu->dmem, + gk20a_bfree(&pmu->dmem, pv->pmu_allocation_get_dmem_offset(pmu, - pv->get_pmu_seq_in_a_ptr(seq)), - pv->pmu_allocation_get_dmem_size(pmu, - pv->get_pmu_seq_in_a_ptr(seq)), - PMU_DMEM_ALLOC_ALIGNMENT); + pv->get_pmu_seq_in_a_ptr(seq))); if (pv->pmu_allocation_get_dmem_size(pmu, pv->get_pmu_seq_out_a_ptr(seq)) != 0) - pmu->dmem.free(&pmu->dmem, + gk20a_bfree(&pmu->dmem, pv->pmu_allocation_get_dmem_offset(pmu, - pv->get_pmu_seq_out_a_ptr(seq)), - pv->pmu_allocation_get_dmem_size(pmu, - pv->get_pmu_seq_out_a_ptr(seq)), - PMU_DMEM_ALLOC_ALIGNMENT); + pv->get_pmu_seq_out_a_ptr(seq))); if (seq->callback) seq->callback(g, msg, seq->cb_params, seq->desc, ret); @@ -3769,11 +3763,10 @@ int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd, pv->pmu_allocation_set_dmem_size(pmu, in, (u16)max(payload->in.size, payload->out.size)); - err = pmu->dmem.alloc(&pmu->dmem, - pv->pmu_allocation_get_dmem_offset_addr(pmu, in), - pv->pmu_allocation_get_dmem_size(pmu, in), - PMU_DMEM_ALLOC_ALIGNMENT); - if (err) + *(pv->pmu_allocation_get_dmem_offset_addr(pmu, in)) = + gk20a_balloc(&pmu->dmem, + pv->pmu_allocation_get_dmem_size(pmu, in)); + if (!*(pv->pmu_allocation_get_dmem_offset_addr(pmu, in))) goto clean_up; pmu_copy_to_dmem(pmu, (pv->pmu_allocation_get_dmem_offset(pmu, @@ -3794,11 +3787,12 @@ int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd, (u16)payload->out.size); if (payload->out.buf != payload->in.buf) { - err = pmu->dmem.alloc(&pmu->dmem, - pv->pmu_allocation_get_dmem_offset_addr(pmu, out), - pv->pmu_allocation_get_dmem_size(pmu, out), - PMU_DMEM_ALLOC_ALIGNMENT); - if (err) + + *(pv->pmu_allocation_get_dmem_offset_addr(pmu, out)) = + gk20a_balloc(&pmu->dmem, + pv->pmu_allocation_get_dmem_size(pmu, out)); + if (!*(pv->pmu_allocation_get_dmem_offset_addr(pmu, + out))) goto clean_up; } else { BUG_ON(in == NULL); @@ -3826,15 +3820,11 @@ int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd, clean_up: gk20a_dbg_fn("fail"); if (in) - pmu->dmem.free(&pmu->dmem, - pv->pmu_allocation_get_dmem_offset(pmu, in), - pv->pmu_allocation_get_dmem_size(pmu, in), - PMU_DMEM_ALLOC_ALIGNMENT); + gk20a_bfree(&pmu->dmem, + pv->pmu_allocation_get_dmem_offset(pmu, in)); if (out) - pmu->dmem.free(&pmu->dmem, - pv->pmu_allocation_get_dmem_offset(pmu, out), - pv->pmu_allocation_get_dmem_size(pmu, out), - PMU_DMEM_ALLOC_ALIGNMENT); + gk20a_bfree(&pmu->dmem, + pv->pmu_allocation_get_dmem_offset(pmu, out)); pmu_seq_release(pmu, seq); return err; diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h index 73530b22..f29c810e 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h @@ -3,7 +3,7 @@ * * GK20A PMU (aka. gPMU outside gk20a context) * - * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -466,7 +466,7 @@ struct pmu_ucode_desc { #define PMU_UNIT_ID_IS_VALID(id) \ (((id) < PMU_UNIT_END) || ((id) >= PMU_UNIT_TEST_START)) -#define PMU_DMEM_ALLOC_ALIGNMENT (32) +#define PMU_DMEM_ALLOC_ALIGNMENT (4) #define PMU_DMEM_ALIGNMENT (4) #define PMU_CMD_FLAGS_PMU_MASK (0xF0) diff --git a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c index 04f61c58..053550f6 100644 --- a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c @@ -3,7 +3,7 @@ * * GK20A Semaphores * - * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -44,8 +44,10 @@ struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc(struct device *d, if (gk20a_get_sgtable(d, &p->sgt, p->cpu_va, p->iova, p->size)) goto clean_up; - if (gk20a_allocator_init(&p->alloc, unique_name, 0, - p->size)) + /* Sacrifice one semaphore in the name of returning error codes. */ + if (gk20a_allocator_init(&p->alloc, unique_name, + SEMAPHORE_SIZE, p->size - SEMAPHORE_SIZE, + SEMAPHORE_SIZE)) goto clean_up; gk20a_dbg_info("cpuva=%p iova=%llx phys=%llx", p->cpu_va, @@ -163,8 +165,8 @@ struct gk20a_semaphore *gk20a_semaphore_alloc(struct gk20a_semaphore_pool *pool) if (!s) return NULL; - if (pool->alloc.alloc(&pool->alloc, &s->offset, SEMAPHORE_SIZE, - SEMAPHORE_SIZE)) { + s->offset = gk20a_balloc(&pool->alloc, SEMAPHORE_SIZE); + if (!s->offset) { gk20a_err(pool->dev, "failed to allocate semaphore"); kfree(s); return NULL; @@ -186,8 +188,7 @@ static void gk20a_semaphore_free(struct kref *ref) struct gk20a_semaphore *s = container_of(ref, struct gk20a_semaphore, ref); - s->pool->alloc.free(&s->pool->alloc, s->offset, SEMAPHORE_SIZE, - SEMAPHORE_SIZE); + gk20a_bfree(&s->pool->alloc, s->offset); gk20a_semaphore_pool_put(s->pool); kfree(s); } diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c index 9d16dba7..bc904ef3 100644 --- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c @@ -90,9 +90,8 @@ static int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) if (err) return err; - gk20a_allocator_init(&gr->comp_tags, "comptag", - 1, /* start */ - max_comptag_lines - 1); /* length*/ + __gk20a_allocator_init(&gr->comp_tags, NULL, "comptag", + 1, max_comptag_lines - 1, 1, 10, 0); gr->comptags_per_cacheline = comptags_per_cacheline; gr->slices_per_ltc = slices_per_ltc; diff --git a/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c b/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c index 1beac216..211e34b5 100644 --- a/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c @@ -41,9 +41,8 @@ static int vgpu_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) if (max_comptag_lines < 2) return -ENXIO; - gk20a_allocator_init(&gr->comp_tags, "comptag", - 1, /* start */ - max_comptag_lines - 1); /* length*/ + __gk20a_allocator_init(&gr->comp_tags, NULL, "comptag", + 1, max_comptag_lines - 1, 1, 10, 0); /* length*/ return 0; } diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c index 94e4602f..855aac0d 100644 --- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c @@ -243,11 +243,9 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share, struct tegra_vgpu_as_share_params *p = &msg.params.as_share; struct mm_gk20a *mm = &g->mm; struct vm_gk20a *vm; - u32 num_small_pages, num_large_pages, low_hole_pages; u64 small_vma_size, large_vma_size; char name[32]; int err, i; - u32 start; /* note: keep the page sizes sorted lowest to highest here */ u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { @@ -294,33 +292,27 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share, small_vma_size = (u64)16 << 30; large_vma_size = vm->va_limit - small_vma_size; - num_small_pages = (u32)(small_vma_size >> - ilog2(vm->gmmu_page_sizes[gmmu_page_size_small])); - - /* num_pages above is without regard to the low-side hole. */ - low_hole_pages = (vm->va_start >> - ilog2(vm->gmmu_page_sizes[gmmu_page_size_small])); - snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, gmmu_page_sizes[gmmu_page_size_small]>>10); - err = gk20a_allocator_init(&vm->vma[gmmu_page_size_small], - name, - low_hole_pages, /*start*/ - num_small_pages - low_hole_pages);/* length*/ + err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_small], + vm, name, + vm->va_start, + small_vma_size - vm->va_start, + SZ_4K, + GPU_BALLOC_MAX_ORDER, + GPU_BALLOC_GVA_SPACE); if (err) goto clean_up_share; - start = (u32)(small_vma_size >> - ilog2(vm->gmmu_page_sizes[gmmu_page_size_big])); - num_large_pages = (u32)(large_vma_size >> - ilog2(vm->gmmu_page_sizes[gmmu_page_size_big])); - snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, gmmu_page_sizes[gmmu_page_size_big]>>10); - err = gk20a_allocator_init(&vm->vma[gmmu_page_size_big], - name, - start, /* start */ - num_large_pages); /* length */ + err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_big], + vm, name, + small_vma_size, + large_vma_size, + big_page_size, + GPU_BALLOC_MAX_ORDER, + GPU_BALLOC_GVA_SPACE); if (err) goto clean_up_small_allocator; -- cgit v1.2.2