From 6df3992b60959d32c7113cb77e131a2547174f3a Mon Sep 17 00:00:00 2001 From: Alex Waterman Date: Tue, 20 Dec 2016 13:55:48 -0800 Subject: gpu: nvgpu: Move allocators to common/mm/ Move the GPU allocators to common/mm/ since the allocators are common code across all GPUs. Also rename the allocator code to move away from gk20a_ prefixed structs and functions. This caused one issue with the nvgpu_alloc() and nvgpu_free() functions. There was a function for allocating either with kmalloc() or vmalloc() depending on the size of the allocation. Those have now been renamed to nvgpu_kalloc() and nvgpu_kfree(). Bug 1799159 Change-Id: Iddda92c013612bcb209847084ec85b8953002fa5 Signed-off-by: Alex Waterman Reviewed-on: http://git-master/r/1274400 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/Makefile.nvgpu | 10 +- drivers/gpu/nvgpu/common/mm/bitmap_allocator.c | 443 +++++++ .../gpu/nvgpu/common/mm/bitmap_allocator_priv.h | 70 ++ drivers/gpu/nvgpu/common/mm/buddy_allocator.c | 1329 ++++++++++++++++++++ drivers/gpu/nvgpu/common/mm/buddy_allocator_priv.h | 192 +++ drivers/gpu/nvgpu/common/mm/lockless_allocator.c | 207 +++ .../gpu/nvgpu/common/mm/lockless_allocator_priv.h | 121 ++ drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c | 212 ++++ drivers/gpu/nvgpu/common/mm/page_allocator.c | 937 ++++++++++++++ drivers/gpu/nvgpu/gk20a/as_gk20a.c | 10 +- drivers/gpu/nvgpu/gk20a/bitmap_allocator_priv.h | 70 -- drivers/gpu/nvgpu/gk20a/buddy_allocator_priv.h | 192 --- drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 12 +- drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 2 +- drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | 4 +- drivers/gpu/nvgpu/gk20a/debug_gk20a.c | 2 +- drivers/gpu/nvgpu/gk20a/fence_gk20a.c | 16 +- drivers/gpu/nvgpu/gk20a/fence_gk20a.h | 2 +- drivers/gpu/nvgpu/gk20a/gk20a.c | 3 +- drivers/gpu/nvgpu/gk20a/gk20a_allocator.c | 211 ---- drivers/gpu/nvgpu/gk20a/gk20a_allocator.h | 302 ----- drivers/gpu/nvgpu/gk20a/gk20a_allocator_bitmap.c | 442 ------- drivers/gpu/nvgpu/gk20a/gk20a_allocator_buddy.c | 1327 ------------------- drivers/gpu/nvgpu/gk20a/gk20a_allocator_lockless.c | 206 --- drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c | 936 -------------- drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 6 +- drivers/gpu/nvgpu/gk20a/lockless_allocator_priv.h | 121 -- drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 114 +- drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 23 +- drivers/gpu/nvgpu/gk20a/page_allocator_priv.h | 164 --- drivers/gpu/nvgpu/gk20a/pmu_gk20a.c | 26 +- drivers/gpu/nvgpu/gk20a/pmu_gk20a.h | 2 +- drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h | 3 +- drivers/gpu/nvgpu/include/nvgpu/allocator.h | 302 +++++ drivers/gpu/nvgpu/include/nvgpu/page_allocator.h | 164 +++ drivers/gpu/nvgpu/vgpu/mm_vgpu.c | 20 +- 36 files changed, 4106 insertions(+), 4097 deletions(-) create mode 100644 drivers/gpu/nvgpu/common/mm/bitmap_allocator.c create mode 100644 drivers/gpu/nvgpu/common/mm/bitmap_allocator_priv.h create mode 100644 drivers/gpu/nvgpu/common/mm/buddy_allocator.c create mode 100644 drivers/gpu/nvgpu/common/mm/buddy_allocator_priv.h create mode 100644 drivers/gpu/nvgpu/common/mm/lockless_allocator.c create mode 100644 drivers/gpu/nvgpu/common/mm/lockless_allocator_priv.h create mode 100644 drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c create mode 100644 drivers/gpu/nvgpu/common/mm/page_allocator.c delete mode 100644 drivers/gpu/nvgpu/gk20a/bitmap_allocator_priv.h delete mode 100644 drivers/gpu/nvgpu/gk20a/buddy_allocator_priv.h delete mode 100644 drivers/gpu/nvgpu/gk20a/gk20a_allocator.c delete mode 100644 drivers/gpu/nvgpu/gk20a/gk20a_allocator.h delete mode 100644 drivers/gpu/nvgpu/gk20a/gk20a_allocator_bitmap.c delete mode 100644 drivers/gpu/nvgpu/gk20a/gk20a_allocator_buddy.c delete mode 100644 drivers/gpu/nvgpu/gk20a/gk20a_allocator_lockless.c delete mode 100644 drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c delete mode 100644 drivers/gpu/nvgpu/gk20a/lockless_allocator_priv.h delete mode 100644 drivers/gpu/nvgpu/gk20a/page_allocator_priv.h create mode 100644 drivers/gpu/nvgpu/include/nvgpu/allocator.h create mode 100644 drivers/gpu/nvgpu/include/nvgpu/page_allocator.h diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu index 93629eff..afce062b 100644 --- a/drivers/gpu/nvgpu/Makefile.nvgpu +++ b/drivers/gpu/nvgpu/Makefile.nvgpu @@ -23,6 +23,11 @@ obj-$(CONFIG_GK20A) := nvgpu.o nvgpu-y := \ common/linux/timers.o \ + common/mm/nvgpu_allocator.o \ + common/mm/bitmap_allocator.o \ + common/mm/buddy_allocator.o \ + common/mm/page_allocator.o \ + common/mm/lockless_allocator.o \ nvgpu_common.o \ gk20a/gk20a.o \ gk20a/sched_gk20a.o \ @@ -51,11 +56,6 @@ nvgpu-y := \ gk20a/fb_gk20a.o \ gk20a/hal.o \ gk20a/hal_gk20a.o \ - gk20a/gk20a_allocator.o \ - gk20a/gk20a_allocator_bitmap.o \ - gk20a/gk20a_allocator_buddy.o \ - gk20a/gk20a_allocator_page.o \ - gk20a/gk20a_allocator_lockless.o \ gk20a/cde_gk20a.o \ gk20a/platform_gk20a_generic.o \ gk20a/tsg_gk20a.o \ diff --git a/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c b/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c new file mode 100644 index 00000000..6f267c85 --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c @@ -0,0 +1,443 @@ +/* + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include + +#include + +#include "bitmap_allocator_priv.h" + +static struct kmem_cache *meta_data_cache; /* slab cache for meta data. */ +static DEFINE_MUTEX(meta_data_cache_lock); + +static u64 nvgpu_bitmap_alloc_length(struct nvgpu_allocator *a) +{ + struct nvgpu_bitmap_allocator *ba = a->priv; + + return ba->length; +} + +static u64 nvgpu_bitmap_alloc_base(struct nvgpu_allocator *a) +{ + struct nvgpu_bitmap_allocator *ba = a->priv; + + return ba->base; +} + +static int nvgpu_bitmap_alloc_inited(struct nvgpu_allocator *a) +{ + struct nvgpu_bitmap_allocator *ba = a->priv; + int inited = ba->inited; + + rmb(); + return inited; +} + +static u64 nvgpu_bitmap_alloc_end(struct nvgpu_allocator *a) +{ + struct nvgpu_bitmap_allocator *ba = a->priv; + + return ba->base + ba->length; +} + +static u64 nvgpu_bitmap_alloc_fixed(struct nvgpu_allocator *__a, + u64 base, u64 len) +{ + struct nvgpu_bitmap_allocator *a = bitmap_allocator(__a); + u64 blks, offs, ret; + + /* Compute the bit offset and make sure it's aligned to a block. */ + offs = base >> a->blk_shift; + if (offs * a->blk_size != base) + return 0; + + offs -= a->bit_offs; + + blks = len >> a->blk_shift; + if (blks * a->blk_size != len) + blks++; + + alloc_lock(__a); + + /* Check if the space requested is already occupied. */ + ret = bitmap_find_next_zero_area(a->bitmap, a->num_bits, offs, blks, 0); + if (ret != offs) + goto fail; + + bitmap_set(a->bitmap, offs, blks); + + a->bytes_alloced += blks * a->blk_size; + a->nr_fixed_allocs++; + alloc_unlock(__a); + + alloc_dbg(__a, "Alloc-fixed 0x%-10llx 0x%-5llx [bits=0x%llx (%llu)]\n", + base, len, blks, blks); + return base; + +fail: + alloc_unlock(__a); + alloc_dbg(__a, "Alloc-fixed failed! (0x%llx)\n", base); + return 0; +} + +/* + * Two possibilities for this function: either we are freeing a fixed allocation + * or we are freeing a regular alloc but with GPU_ALLOC_NO_ALLOC_PAGE defined. + * + * Note: this function won't do much error checking. Thus you could really + * confuse the allocator if you misuse this function. + */ +static void nvgpu_bitmap_free_fixed(struct nvgpu_allocator *__a, + u64 base, u64 len) +{ + struct nvgpu_bitmap_allocator *a = bitmap_allocator(__a); + u64 blks, offs; + + offs = base >> a->blk_shift; + if (WARN_ON(offs * a->blk_size != base)) + return; + + offs -= a->bit_offs; + + blks = len >> a->blk_shift; + if (blks * a->blk_size != len) + blks++; + + alloc_lock(__a); + bitmap_clear(a->bitmap, offs, blks); + a->bytes_freed += blks * a->blk_size; + alloc_unlock(__a); + + alloc_dbg(__a, "Free-fixed 0x%-10llx 0x%-5llx [bits=0x%llx (%llu)]\n", + base, len, blks, blks); +} + +/* + * Add the passed alloc to the tree of stored allocations. + */ +static void insert_alloc_metadata(struct nvgpu_bitmap_allocator *a, + struct nvgpu_bitmap_alloc *alloc) +{ + struct rb_node **new = &a->allocs.rb_node; + struct rb_node *parent = NULL; + struct nvgpu_bitmap_alloc *tmp; + + while (*new) { + tmp = container_of(*new, struct nvgpu_bitmap_alloc, + alloc_entry); + + parent = *new; + if (alloc->base < tmp->base) + new = &((*new)->rb_left); + else if (alloc->base > tmp->base) + new = &((*new)->rb_right); + else { + WARN_ON("Duplicate entries in RB alloc tree!\n"); + return; + } + } + + rb_link_node(&alloc->alloc_entry, parent, new); + rb_insert_color(&alloc->alloc_entry, &a->allocs); +} + +/* + * Find and remove meta-data from the outstanding allocations. + */ +static struct nvgpu_bitmap_alloc *find_alloc_metadata( + struct nvgpu_bitmap_allocator *a, u64 addr) +{ + struct rb_node *node = a->allocs.rb_node; + struct nvgpu_bitmap_alloc *alloc; + + while (node) { + alloc = container_of(node, struct nvgpu_bitmap_alloc, + alloc_entry); + + if (addr < alloc->base) + node = node->rb_left; + else if (addr > alloc->base) + node = node->rb_right; + else + break; + } + + if (!node) + return NULL; + + rb_erase(node, &a->allocs); + + return alloc; +} + +/* + * Tree of alloc meta data stores the address of the alloc not the bit offset. + */ +static int __nvgpu_bitmap_store_alloc(struct nvgpu_bitmap_allocator *a, + u64 addr, u64 len) +{ + struct nvgpu_bitmap_alloc *alloc = + kmem_cache_alloc(meta_data_cache, GFP_KERNEL); + + if (!alloc) + return -ENOMEM; + + alloc->base = addr; + alloc->length = len; + + insert_alloc_metadata(a, alloc); + + return 0; +} + +/* + * @len is in bytes. This routine will figure out the right number of bits to + * actually allocate. The return is the address in bytes as well. + */ +static u64 nvgpu_bitmap_alloc(struct nvgpu_allocator *__a, u64 len) +{ + u64 blks, addr; + unsigned long offs, adjusted_offs, limit; + struct nvgpu_bitmap_allocator *a = bitmap_allocator(__a); + + blks = len >> a->blk_shift; + + if (blks * a->blk_size != len) + blks++; + + alloc_lock(__a); + + /* + * First look from next_blk and onwards... + */ + offs = bitmap_find_next_zero_area(a->bitmap, a->num_bits, + a->next_blk, blks, 0); + if (offs >= a->num_bits) { + /* + * If that didn't work try the remaining area. Since there can + * be available space that spans across a->next_blk we need to + * search up to the first set bit after that. + */ + limit = find_next_bit(a->bitmap, a->num_bits, a->next_blk); + offs = bitmap_find_next_zero_area(a->bitmap, limit, + 0, blks, 0); + if (offs >= a->next_blk) + goto fail; + } + + bitmap_set(a->bitmap, offs, blks); + a->next_blk = offs + blks; + + adjusted_offs = offs + a->bit_offs; + addr = ((u64)adjusted_offs) * a->blk_size; + + /* + * Only do meta-data storage if we are allowed to allocate storage for + * that meta-data. The issue with using kmalloc() and friends is that + * in latency and success critical paths an alloc_page() call can either + * sleep for potentially a long time or, assuming GFP_ATOMIC, fail. + * Since we might not want either of these possibilities assume that the + * caller will keep what data it needs around to successfully free this + * allocation. + */ + if (!(a->flags & GPU_ALLOC_NO_ALLOC_PAGE) && + __nvgpu_bitmap_store_alloc(a, addr, blks * a->blk_size)) + goto fail_reset_bitmap; + + alloc_dbg(__a, "Alloc 0x%-10llx 0x%-5llx [bits=0x%llx (%llu)]\n", + addr, len, blks, blks); + + a->nr_allocs++; + a->bytes_alloced += (blks * a->blk_size); + alloc_unlock(__a); + + return addr; + +fail_reset_bitmap: + bitmap_clear(a->bitmap, offs, blks); +fail: + a->next_blk = 0; + alloc_unlock(__a); + alloc_dbg(__a, "Alloc failed!\n"); + return 0; +} + +static void nvgpu_bitmap_free(struct nvgpu_allocator *__a, u64 addr) +{ + struct nvgpu_bitmap_allocator *a = bitmap_allocator(__a); + struct nvgpu_bitmap_alloc *alloc = NULL; + u64 offs, adjusted_offs, blks; + + alloc_lock(__a); + + if (a->flags & GPU_ALLOC_NO_ALLOC_PAGE) { + WARN(1, "Using wrong free for NO_ALLOC_PAGE bitmap allocator"); + goto done; + } + + alloc = find_alloc_metadata(a, addr); + if (!alloc) + goto done; + + /* + * Address comes from adjusted offset (i.e the bit offset with + * a->bit_offs added. So start with that and then work out the real + * offs into the bitmap. + */ + adjusted_offs = addr >> a->blk_shift; + offs = adjusted_offs - a->bit_offs; + blks = alloc->length >> a->blk_shift; + + bitmap_clear(a->bitmap, offs, blks); + alloc_dbg(__a, "Free 0x%-10llx\n", addr); + + a->bytes_freed += alloc->length; + +done: + kfree(alloc); + alloc_unlock(__a); +} + +static void nvgpu_bitmap_alloc_destroy(struct nvgpu_allocator *__a) +{ + struct nvgpu_bitmap_allocator *a = bitmap_allocator(__a); + struct nvgpu_bitmap_alloc *alloc; + struct rb_node *node; + + /* + * Kill any outstanding allocations. + */ + while ((node = rb_first(&a->allocs)) != NULL) { + alloc = container_of(node, struct nvgpu_bitmap_alloc, + alloc_entry); + + rb_erase(node, &a->allocs); + kfree(alloc); + } + + kfree(a->bitmap); + kfree(a); +} + +static void nvgpu_bitmap_print_stats(struct nvgpu_allocator *__a, + struct seq_file *s, int lock) +{ + struct nvgpu_bitmap_allocator *a = bitmap_allocator(__a); + + __alloc_pstat(s, __a, "Bitmap allocator params:\n"); + __alloc_pstat(s, __a, " start = 0x%llx\n", a->base); + __alloc_pstat(s, __a, " end = 0x%llx\n", a->base + a->length); + __alloc_pstat(s, __a, " blks = 0x%llx\n", a->num_bits); + + /* Actual stats. */ + __alloc_pstat(s, __a, "Stats:\n"); + __alloc_pstat(s, __a, " Number allocs = 0x%llx\n", a->nr_allocs); + __alloc_pstat(s, __a, " Number fixed = 0x%llx\n", a->nr_fixed_allocs); + __alloc_pstat(s, __a, " Bytes alloced = 0x%llx\n", a->bytes_alloced); + __alloc_pstat(s, __a, " Bytes freed = 0x%llx\n", a->bytes_freed); + __alloc_pstat(s, __a, " Outstanding = 0x%llx\n", + a->bytes_alloced - a->bytes_freed); +} + +static const struct nvgpu_allocator_ops bitmap_ops = { + .alloc = nvgpu_bitmap_alloc, + .free = nvgpu_bitmap_free, + + .alloc_fixed = nvgpu_bitmap_alloc_fixed, + .free_fixed = nvgpu_bitmap_free_fixed, + + .base = nvgpu_bitmap_alloc_base, + .length = nvgpu_bitmap_alloc_length, + .end = nvgpu_bitmap_alloc_end, + .inited = nvgpu_bitmap_alloc_inited, + + .fini = nvgpu_bitmap_alloc_destroy, + + .print_stats = nvgpu_bitmap_print_stats, +}; + + +int nvgpu_bitmap_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a, + const char *name, u64 base, u64 length, + u64 blk_size, u64 flags) +{ + int err; + struct nvgpu_bitmap_allocator *a; + + mutex_lock(&meta_data_cache_lock); + if (!meta_data_cache) + meta_data_cache = KMEM_CACHE(nvgpu_bitmap_alloc, 0); + mutex_unlock(&meta_data_cache_lock); + + if (!meta_data_cache) + return -ENOMEM; + + if (WARN_ON(blk_size & (blk_size - 1))) + return -EINVAL; + + /* + * blk_size must be a power-of-2; base length also need to be aligned + * to blk_size. + */ + if (blk_size & (blk_size - 1) || + base & (blk_size - 1) || length & (blk_size - 1)) + return -EINVAL; + + if (base == 0) { + base = blk_size; + length -= blk_size; + } + + a = kzalloc(sizeof(struct nvgpu_bitmap_allocator), GFP_KERNEL); + if (!a) + return -ENOMEM; + + err = __nvgpu_alloc_common_init(__a, name, a, false, &bitmap_ops); + if (err) + goto fail; + + a->base = base; + a->length = length; + a->blk_size = blk_size; + a->blk_shift = __ffs(a->blk_size); + a->num_bits = length >> a->blk_shift; + a->bit_offs = a->base >> a->blk_shift; + a->flags = flags; + + a->bitmap = kcalloc(BITS_TO_LONGS(a->num_bits), sizeof(*a->bitmap), + GFP_KERNEL); + if (!a->bitmap) + goto fail; + + wmb(); + a->inited = true; + + nvgpu_init_alloc_debug(g, __a); + alloc_dbg(__a, "New allocator: type bitmap\n"); + alloc_dbg(__a, " base 0x%llx\n", a->base); + alloc_dbg(__a, " bit_offs 0x%llx\n", a->bit_offs); + alloc_dbg(__a, " size 0x%llx\n", a->length); + alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size); + alloc_dbg(__a, " flags 0x%llx\n", a->flags); + + return 0; + +fail: + kfree(a); + return err; +} diff --git a/drivers/gpu/nvgpu/common/mm/bitmap_allocator_priv.h b/drivers/gpu/nvgpu/common/mm/bitmap_allocator_priv.h new file mode 100644 index 00000000..9802b9db --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/bitmap_allocator_priv.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef BITMAP_ALLOCATOR_PRIV_H +#define BITMAP_ALLOCATOR_PRIV_H + +#include + +struct nvgpu_allocator; + +struct nvgpu_bitmap_allocator { + struct nvgpu_allocator *owner; + + u64 base; /* Base address of the space. */ + u64 length; /* Length of the space. */ + u64 blk_size; /* Size that corresponds to 1 bit. */ + u64 blk_shift; /* Bit shift to divide by blk_size. */ + u64 num_bits; /* Number of allocatable bits. */ + u64 bit_offs; /* Offset of bitmap. */ + + /* + * Optimization for making repeated allocations faster. Keep track of + * the next bit after the most recent allocation. This is where the next + * search will start from. This should make allocation faster in cases + * where lots of allocations get made one after another. It shouldn't + * have a negative impact on the case where the allocator is fragmented. + */ + u64 next_blk; + + unsigned long *bitmap; /* The actual bitmap! */ + struct rb_root allocs; /* Tree of outstanding allocations. */ + + u64 flags; + + bool inited; + + /* Statistics */ + u64 nr_allocs; + u64 nr_fixed_allocs; + u64 bytes_alloced; + u64 bytes_freed; +}; + +struct nvgpu_bitmap_alloc { + u64 base; + u64 length; + struct rb_node alloc_entry; /* RB tree of allocations. */ +}; + +static inline struct nvgpu_bitmap_allocator *bitmap_allocator( + struct nvgpu_allocator *a) +{ + return (struct nvgpu_bitmap_allocator *)(a)->priv; +} + + +#endif diff --git a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c new file mode 100644 index 00000000..39a53801 --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c @@ -0,0 +1,1329 @@ +/* + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +#include + +#include "gk20a/mm_gk20a.h" +#include "gk20a/platform_gk20a.h" + +#include "buddy_allocator_priv.h" + +static struct kmem_cache *buddy_cache; /* slab cache for meta data. */ + +/* Some other buddy allocator functions. */ +static struct nvgpu_buddy *balloc_free_buddy(struct nvgpu_buddy_allocator *a, + u64 addr); +static void balloc_coalesce(struct nvgpu_buddy_allocator *a, + struct nvgpu_buddy *b); +static void __balloc_do_free_fixed(struct nvgpu_buddy_allocator *a, + struct nvgpu_fixed_alloc *falloc); + +/* + * This function is not present in older kernel's list.h code. + */ +#ifndef list_last_entry +#define list_last_entry(ptr, type, member) \ + list_entry((ptr)->prev, type, member) +#endif + +/* + * GPU buddy allocator for various address spaces. + * + * Current limitations: + * o A fixed allocation could potentially be made that borders PDEs with + * different PTE sizes. This would require that fixed buffer to have + * different sized PTEs for different parts of the allocation. Probably + * best to just require PDE alignment for fixed address allocs. + * + * o It is currently possible to make an allocator that has a buddy alignment + * out of sync with the PDE block size alignment. A simple example is a + * 32GB address space starting at byte 1. Every buddy is shifted off by 1 + * which means each buddy corresponf to more than one actual GPU page. The + * best way to fix this is probably just require PDE blocksize alignment + * for the start of the address space. At the moment all allocators are + * easily PDE aligned so this hasn't been a problem. + */ + +/* + * Pick a suitable maximum order for this allocator. + * + * Hueristic: Just guessing that the best max order is the largest single + * block that will fit in the address space. + */ +static void balloc_compute_max_order(struct nvgpu_buddy_allocator *a) +{ + u64 true_max_order = ilog2(a->blks); + + if (a->max_order == 0) { + a->max_order = true_max_order; + return; + } + + if (a->max_order > true_max_order) + a->max_order = true_max_order; + if (a->max_order > GPU_BALLOC_MAX_ORDER) + a->max_order = GPU_BALLOC_MAX_ORDER; +} + +/* + * Since we can only allocate in chucks of a->blk_size we need to trim off + * any excess data that is not aligned to a->blk_size. + */ +static void balloc_allocator_align(struct nvgpu_buddy_allocator *a) +{ + a->start = ALIGN(a->base, a->blk_size); + WARN_ON(a->start != a->base); + a->end = (a->base + a->length) & ~(a->blk_size - 1); + a->count = a->end - a->start; + a->blks = a->count >> a->blk_shift; +} + +/* + * Pass NULL for parent if you want a top level buddy. + */ +static struct nvgpu_buddy *balloc_new_buddy(struct nvgpu_buddy_allocator *a, + struct nvgpu_buddy *parent, + u64 start, u64 order) +{ + struct nvgpu_buddy *new_buddy; + + new_buddy = kmem_cache_alloc(buddy_cache, GFP_KERNEL); + if (!new_buddy) + return NULL; + + memset(new_buddy, 0, sizeof(struct nvgpu_buddy)); + + new_buddy->parent = parent; + new_buddy->start = start; + new_buddy->order = order; + new_buddy->end = start + (1 << order) * a->blk_size; + new_buddy->pte_size = BALLOC_PTE_SIZE_ANY; + + return new_buddy; +} + +static void __balloc_buddy_list_add(struct nvgpu_buddy_allocator *a, + struct nvgpu_buddy *b, + struct list_head *list) +{ + if (buddy_is_in_list(b)) { + alloc_dbg(balloc_owner(a), + "Oops: adding added buddy (%llu:0x%llx)\n", + b->order, b->start); + BUG(); + } + + /* + * Add big PTE blocks to the tail, small to the head for GVA spaces. + * This lets the code that checks if there are available blocks check + * without cycling through the entire list. + */ + if (a->flags & GPU_ALLOC_GVA_SPACE && + b->pte_size == gmmu_page_size_big) + list_add_tail(&b->buddy_entry, list); + else + list_add(&b->buddy_entry, list); + + buddy_set_in_list(b); +} + +static void __balloc_buddy_list_rem(struct nvgpu_buddy_allocator *a, + struct nvgpu_buddy *b) +{ + if (!buddy_is_in_list(b)) { + alloc_dbg(balloc_owner(a), + "Oops: removing removed buddy (%llu:0x%llx)\n", + b->order, b->start); + BUG(); + } + + list_del_init(&b->buddy_entry); + buddy_clr_in_list(b); +} + +/* + * Add a buddy to one of the buddy lists and deal with the necessary + * book keeping. Adds the buddy to the list specified by the buddy's order. + */ +static void balloc_blist_add(struct nvgpu_buddy_allocator *a, + struct nvgpu_buddy *b) +{ + __balloc_buddy_list_add(a, b, balloc_get_order_list(a, b->order)); + a->buddy_list_len[b->order]++; +} + +static void balloc_blist_rem(struct nvgpu_buddy_allocator *a, + struct nvgpu_buddy *b) +{ + __balloc_buddy_list_rem(a, b); + a->buddy_list_len[b->order]--; +} + +static u64 balloc_get_order(struct nvgpu_buddy_allocator *a, u64 len) +{ + if (len == 0) + return 0; + + len--; + len >>= a->blk_shift; + + return fls(len); +} + +static u64 __balloc_max_order_in(struct nvgpu_buddy_allocator *a, + u64 start, u64 end) +{ + u64 size = (end - start) >> a->blk_shift; + + if (size > 0) + return min_t(u64, ilog2(size), a->max_order); + else + return GPU_BALLOC_MAX_ORDER; +} + +/* + * Initialize the buddy lists. + */ +static int balloc_init_lists(struct nvgpu_buddy_allocator *a) +{ + int i; + u64 bstart, bend, order; + struct nvgpu_buddy *buddy; + + bstart = a->start; + bend = a->end; + + /* First make sure the LLs are valid. */ + for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) + INIT_LIST_HEAD(balloc_get_order_list(a, i)); + + while (bstart < bend) { + order = __balloc_max_order_in(a, bstart, bend); + + buddy = balloc_new_buddy(a, NULL, bstart, order); + if (!buddy) + goto cleanup; + + balloc_blist_add(a, buddy); + bstart += balloc_order_to_len(a, order); + } + + return 0; + +cleanup: + for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) { + if (!list_empty(balloc_get_order_list(a, i))) { + buddy = list_first_entry(balloc_get_order_list(a, i), + struct nvgpu_buddy, buddy_entry); + balloc_blist_rem(a, buddy); + kmem_cache_free(buddy_cache, buddy); + } + } + + return -ENOMEM; +} + +/* + * Clean up and destroy the passed allocator. + */ +static void nvgpu_buddy_allocator_destroy(struct nvgpu_allocator *__a) +{ + int i; + struct rb_node *node; + struct nvgpu_buddy *bud; + struct nvgpu_fixed_alloc *falloc; + struct nvgpu_buddy_allocator *a = __a->priv; + + alloc_lock(__a); + + nvgpu_fini_alloc_debug(__a); + + /* + * Free the fixed allocs first. + */ + while ((node = rb_first(&a->fixed_allocs)) != NULL) { + falloc = container_of(node, + struct nvgpu_fixed_alloc, alloced_entry); + + rb_erase(node, &a->fixed_allocs); + __balloc_do_free_fixed(a, falloc); + } + + /* + * And now free all outstanding allocations. + */ + while ((node = rb_first(&a->alloced_buddies)) != NULL) { + bud = container_of(node, struct nvgpu_buddy, alloced_entry); + balloc_free_buddy(a, bud->start); + balloc_blist_add(a, bud); + balloc_coalesce(a, bud); + } + + /* + * Now clean up the unallocated buddies. + */ + for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) { + BUG_ON(a->buddy_list_alloced[i] != 0); + + while (!list_empty(balloc_get_order_list(a, i))) { + bud = list_first_entry(balloc_get_order_list(a, i), + struct nvgpu_buddy, buddy_entry); + balloc_blist_rem(a, bud); + kmem_cache_free(buddy_cache, bud); + } + + if (a->buddy_list_len[i] != 0) { + pr_info("Excess buddies!!! (%d: %llu)\n", + i, a->buddy_list_len[i]); + BUG(); + } + if (a->buddy_list_split[i] != 0) { + pr_info("Excess split nodes!!! (%d: %llu)\n", + i, a->buddy_list_split[i]); + BUG(); + } + if (a->buddy_list_alloced[i] != 0) { + pr_info("Excess alloced nodes!!! (%d: %llu)\n", + i, a->buddy_list_alloced[i]); + BUG(); + } + } + + kfree(a); + + alloc_unlock(__a); +} + +/* + * Combine the passed buddy if possible. The pointer in @b may not be valid + * after this as the buddy may be freed. + * + * @a must be locked. + */ +static void balloc_coalesce(struct nvgpu_buddy_allocator *a, + struct nvgpu_buddy *b) +{ + struct nvgpu_buddy *parent; + + if (buddy_is_alloced(b) || buddy_is_split(b)) + return; + + /* + * If both our buddy and I are both not allocated and not split then + * we can coalesce ourselves. + */ + if (!b->buddy) + return; + if (buddy_is_alloced(b->buddy) || buddy_is_split(b->buddy)) + return; + + parent = b->parent; + + balloc_blist_rem(a, b); + balloc_blist_rem(a, b->buddy); + + buddy_clr_split(parent); + a->buddy_list_split[parent->order]--; + balloc_blist_add(a, parent); + + /* + * Recursively coalesce as far as we can go. + */ + balloc_coalesce(a, parent); + + /* Clean up the remains. */ + kmem_cache_free(buddy_cache, b->buddy); + kmem_cache_free(buddy_cache, b); +} + +/* + * Split a buddy into two new buddies who are 1/2 the size of the parent buddy. + * + * @a must be locked. + */ +static int balloc_split_buddy(struct nvgpu_buddy_allocator *a, + struct nvgpu_buddy *b, int pte_size) +{ + struct nvgpu_buddy *left, *right; + u64 half; + + left = balloc_new_buddy(a, b, b->start, b->order - 1); + if (!left) + return -ENOMEM; + + half = (b->end - b->start) / 2; + + right = balloc_new_buddy(a, b, b->start + half, b->order - 1); + if (!right) { + kmem_cache_free(buddy_cache, left); + return -ENOMEM; + } + + buddy_set_split(b); + a->buddy_list_split[b->order]++; + + b->left = left; + b->right = right; + left->buddy = right; + right->buddy = left; + left->parent = b; + right->parent = b; + + /* PTE considerations. */ + if (a->flags & GPU_ALLOC_GVA_SPACE && + left->order <= a->pte_blk_order) { + left->pte_size = pte_size; + right->pte_size = pte_size; + } + + balloc_blist_rem(a, b); + balloc_blist_add(a, left); + balloc_blist_add(a, right); + + return 0; +} + +/* + * Place the passed buddy into the RB tree for allocated buddies. Never fails + * unless the passed entry is a duplicate which is a bug. + * + * @a must be locked. + */ +static void balloc_alloc_buddy(struct nvgpu_buddy_allocator *a, + struct nvgpu_buddy *b) +{ + struct rb_node **new = &(a->alloced_buddies.rb_node); + struct rb_node *parent = NULL; + + while (*new) { + struct nvgpu_buddy *bud = container_of(*new, struct nvgpu_buddy, + alloced_entry); + + parent = *new; + if (b->start < bud->start) + new = &((*new)->rb_left); + else if (b->start > bud->start) + new = &((*new)->rb_right); + else + BUG_ON("Duplicate entries in allocated list!\n"); + } + + rb_link_node(&b->alloced_entry, parent, new); + rb_insert_color(&b->alloced_entry, &a->alloced_buddies); + + buddy_set_alloced(b); + a->buddy_list_alloced[b->order]++; +} + +/* + * Remove the passed buddy from the allocated buddy RB tree. Returns the + * deallocated buddy for further processing. + * + * @a must be locked. + */ +static struct nvgpu_buddy *balloc_free_buddy(struct nvgpu_buddy_allocator *a, + u64 addr) +{ + struct rb_node *node = a->alloced_buddies.rb_node; + struct nvgpu_buddy *bud; + + while (node) { + bud = container_of(node, struct nvgpu_buddy, alloced_entry); + + if (addr < bud->start) + node = node->rb_left; + else if (addr > bud->start) + node = node->rb_right; + else + break; + } + + if (!node) + return NULL; + + rb_erase(node, &a->alloced_buddies); + buddy_clr_alloced(bud); + a->buddy_list_alloced[bud->order]--; + + return bud; +} + +/* + * Find a suitable buddy for the given order and PTE type (big or little). + */ +static struct nvgpu_buddy *__balloc_find_buddy(struct nvgpu_buddy_allocator *a, + u64 order, int pte_size) +{ + struct nvgpu_buddy *bud; + + if (order > a->max_order || + list_empty(balloc_get_order_list(a, order))) + return NULL; + + if (a->flags & GPU_ALLOC_GVA_SPACE && + pte_size == gmmu_page_size_big) + bud = list_last_entry(balloc_get_order_list(a, order), + struct nvgpu_buddy, buddy_entry); + else + bud = list_first_entry(balloc_get_order_list(a, order), + struct nvgpu_buddy, buddy_entry); + + if (bud->pte_size != BALLOC_PTE_SIZE_ANY && + bud->pte_size != pte_size) + return NULL; + + return bud; +} + +/* + * Allocate a suitably sized buddy. If no suitable buddy exists split higher + * order buddies until we have a suitable buddy to allocate. + * + * For PDE grouping add an extra check to see if a buddy is suitable: that the + * buddy exists in a PDE who's PTE size is reasonable + * + * @a must be locked. + */ +static u64 __balloc_do_alloc(struct nvgpu_buddy_allocator *a, + u64 order, int pte_size) +{ + u64 split_order; + struct nvgpu_buddy *bud = NULL; + + split_order = order; + while (split_order <= a->max_order && + !(bud = __balloc_find_buddy(a, split_order, pte_size))) + split_order++; + + /* Out of memory! */ + if (!bud) + return 0; + + while (bud->order != order) { + if (balloc_split_buddy(a, bud, pte_size)) + return 0; /* No mem... */ + bud = bud->left; + } + + balloc_blist_rem(a, bud); + balloc_alloc_buddy(a, bud); + + return bud->start; +} + +/* + * See if the passed range is actually available for allocation. If so, then + * return 1, otherwise return 0. + * + * TODO: Right now this uses the unoptimal approach of going through all + * outstanding allocations and checking their base/ends. This could be better. + */ +static int balloc_is_range_free(struct nvgpu_buddy_allocator *a, + u64 base, u64 end) +{ + struct rb_node *node; + struct nvgpu_buddy *bud; + + node = rb_first(&a->alloced_buddies); + if (!node) + return 1; /* No allocs yet. */ + + bud = container_of(node, struct nvgpu_buddy, alloced_entry); + + while (bud->start < end) { + if ((bud->start > base && bud->start < end) || + (bud->end > base && bud->end < end)) + return 0; + + node = rb_next(node); + if (!node) + break; + bud = container_of(node, struct nvgpu_buddy, alloced_entry); + } + + return 1; +} + +static void balloc_alloc_fixed(struct nvgpu_buddy_allocator *a, + struct nvgpu_fixed_alloc *f) +{ + struct rb_node **new = &(a->fixed_allocs.rb_node); + struct rb_node *parent = NULL; + + while (*new) { + struct nvgpu_fixed_alloc *falloc = + container_of(*new, struct nvgpu_fixed_alloc, + alloced_entry); + + BUG_ON(!virt_addr_valid(falloc)); + + parent = *new; + if (f->start < falloc->start) + new = &((*new)->rb_left); + else if (f->start > falloc->start) + new = &((*new)->rb_right); + else + BUG_ON("Duplicate entries in allocated list!\n"); + } + + rb_link_node(&f->alloced_entry, parent, new); + rb_insert_color(&f->alloced_entry, &a->fixed_allocs); +} + +/* + * Remove the passed buddy from the allocated buddy RB tree. Returns the + * deallocated buddy for further processing. + * + * @a must be locked. + */ +static struct nvgpu_fixed_alloc *balloc_free_fixed( + struct nvgpu_buddy_allocator *a, u64 addr) +{ + struct rb_node *node = a->fixed_allocs.rb_node; + struct nvgpu_fixed_alloc *falloc; + + while (node) { + falloc = container_of(node, + struct nvgpu_fixed_alloc, alloced_entry); + + if (addr < falloc->start) + node = node->rb_left; + else if (addr > falloc->start) + node = node->rb_right; + else + break; + } + + if (!node) + return NULL; + + rb_erase(node, &a->fixed_allocs); + + return falloc; +} + +/* + * Find the parent range - doesn't necessarily need the parent to actually exist + * as a buddy. Finding an existing parent comes later... + */ +static void __balloc_get_parent_range(struct nvgpu_buddy_allocator *a, + u64 base, u64 order, + u64 *pbase, u64 *porder) +{ + u64 base_mask; + u64 shifted_base = balloc_base_shift(a, base); + + order++; + base_mask = ~((a->blk_size << order) - 1); + + shifted_base &= base_mask; + + *pbase = balloc_base_unshift(a, shifted_base); + *porder = order; +} + +/* + * Makes a buddy at the passed address. This will make all parent buddies + * necessary for this buddy to exist as well. + */ +static struct nvgpu_buddy *__balloc_make_fixed_buddy( + struct nvgpu_buddy_allocator *a, u64 base, u64 order) +{ + struct nvgpu_buddy *bud = NULL; + struct list_head *order_list; + u64 cur_order = order, cur_base = base; + + /* + * Algo: + * 1. Keep jumping up a buddy order until we find the real buddy that + * this buddy exists in. + * 2. Then work our way down through the buddy tree until we hit a dead + * end. + * 3. Start splitting buddies until we split to the one we need to + * make. + */ + while (cur_order <= a->max_order) { + int found = 0; + + order_list = balloc_get_order_list(a, cur_order); + list_for_each_entry(bud, order_list, buddy_entry) { + if (bud->start == cur_base) { + found = 1; + break; + } + } + + if (found) + break; + + __balloc_get_parent_range(a, cur_base, cur_order, + &cur_base, &cur_order); + } + + if (cur_order > a->max_order) { + alloc_dbg(balloc_owner(a), "No buddy for range ???\n"); + return NULL; + } + + /* Split this buddy as necessary until we get the target buddy. */ + while (bud->start != base || bud->order != order) { + if (balloc_split_buddy(a, bud, BALLOC_PTE_SIZE_ANY)) { + balloc_coalesce(a, bud); + return NULL; + } + + if (base < bud->right->start) + bud = bud->left; + else + bud = bud->right; + + } + + return bud; +} + +static u64 __balloc_do_alloc_fixed(struct nvgpu_buddy_allocator *a, + struct nvgpu_fixed_alloc *falloc, + u64 base, u64 len) +{ + u64 shifted_base, inc_base; + u64 align_order; + + shifted_base = balloc_base_shift(a, base); + if (shifted_base == 0) + align_order = __fls(len >> a->blk_shift); + else + align_order = min_t(u64, + __ffs(shifted_base >> a->blk_shift), + __fls(len >> a->blk_shift)); + + if (align_order > a->max_order) { + alloc_dbg(balloc_owner(a), + "Align order too big: %llu > %llu\n", + align_order, a->max_order); + return 0; + } + + /* + * Generate a list of buddies that satisfy this allocation. + */ + inc_base = shifted_base; + while (inc_base < (shifted_base + len)) { + u64 order_len = balloc_order_to_len(a, align_order); + u64 remaining; + struct nvgpu_buddy *bud; + + bud = __balloc_make_fixed_buddy(a, + balloc_base_unshift(a, inc_base), + align_order); + if (!bud) { + alloc_dbg(balloc_owner(a), + "Fixed buddy failed: {0x%llx, %llu}!\n", + balloc_base_unshift(a, inc_base), + align_order); + goto err_and_cleanup; + } + + balloc_blist_rem(a, bud); + balloc_alloc_buddy(a, bud); + __balloc_buddy_list_add(a, bud, &falloc->buddies); + + /* Book keeping. */ + inc_base += order_len; + remaining = (shifted_base + len) - inc_base; + align_order = __ffs(inc_base >> a->blk_shift); + + /* If we don't have much left - trim down align_order. */ + if (balloc_order_to_len(a, align_order) > remaining) + align_order = __balloc_max_order_in(a, inc_base, + inc_base + remaining); + } + + return base; + +err_and_cleanup: + while (!list_empty(&falloc->buddies)) { + struct nvgpu_buddy *bud = list_first_entry(&falloc->buddies, + struct nvgpu_buddy, + buddy_entry); + + __balloc_buddy_list_rem(a, bud); + balloc_free_buddy(a, bud->start); + kmem_cache_free(buddy_cache, bud); + } + + return 0; +} + +static void __balloc_do_free_fixed(struct nvgpu_buddy_allocator *a, + struct nvgpu_fixed_alloc *falloc) +{ + struct nvgpu_buddy *bud; + + while (!list_empty(&falloc->buddies)) { + bud = list_first_entry(&falloc->buddies, + struct nvgpu_buddy, + buddy_entry); + __balloc_buddy_list_rem(a, bud); + + balloc_free_buddy(a, bud->start); + balloc_blist_add(a, bud); + a->bytes_freed += balloc_order_to_len(a, bud->order); + + /* + * Attemp to defrag the allocation. + */ + balloc_coalesce(a, bud); + } + + kfree(falloc); +} + +/* + * Allocate memory from the passed allocator. + */ +static u64 nvgpu_buddy_balloc(struct nvgpu_allocator *__a, u64 len) +{ + u64 order, addr; + int pte_size; + struct nvgpu_buddy_allocator *a = __a->priv; + + nvgpu_alloc_trace_func(); + + alloc_lock(__a); + + order = balloc_get_order(a, len); + + if (order > a->max_order) { + alloc_unlock(__a); + alloc_dbg(balloc_owner(a), "Alloc fail\n"); + nvgpu_alloc_trace_func_done(); + return 0; + } + + /* + * For now pass the base address of the allocator's region to + * __get_pte_size(). This ensures we get the right page size for + * the alloc but we don't have to know what the real address is + * going to be quite yet. + * + * TODO: once userspace supports a unified address space pass 0 for + * the base. This will make only 'len' affect the PTE size. + */ + if (a->flags & GPU_ALLOC_GVA_SPACE) + pte_size = __get_pte_size(a->vm, a->base, len); + else + pte_size = BALLOC_PTE_SIZE_ANY; + + addr = __balloc_do_alloc(a, order, pte_size); + + if (addr) { + a->bytes_alloced += len; + a->bytes_alloced_real += balloc_order_to_len(a, order); + alloc_dbg(balloc_owner(a), + "Alloc 0x%-10llx %3lld:0x%-10llx pte_size=%s\n", + addr, order, len, + pte_size == gmmu_page_size_big ? "big" : + pte_size == gmmu_page_size_small ? "small" : + "NA/any"); + } else { + alloc_dbg(balloc_owner(a), "Alloc failed: no mem!\n"); + } + + a->alloc_made = 1; + + alloc_unlock(__a); + + nvgpu_alloc_trace_func_done(); + return addr; +} + +/* + * Requires @__a to be locked. + */ +static u64 __nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a, + u64 base, u64 len) +{ + u64 ret, real_bytes = 0; + struct nvgpu_buddy *bud; + struct nvgpu_fixed_alloc *falloc = NULL; + struct nvgpu_buddy_allocator *a = __a->priv; + + nvgpu_alloc_trace_func(); + + /* If base isn't aligned to an order 0 block, fail. */ + if (base & (a->blk_size - 1)) + goto fail; + + if (len == 0) + goto fail; + + falloc = kmalloc(sizeof(*falloc), GFP_KERNEL); + if (!falloc) + goto fail; + + INIT_LIST_HEAD(&falloc->buddies); + falloc->start = base; + falloc->end = base + len; + + if (!balloc_is_range_free(a, base, base + len)) { + alloc_dbg(balloc_owner(a), + "Range not free: 0x%llx -> 0x%llx\n", + base, base + len); + goto fail_unlock; + } + + ret = __balloc_do_alloc_fixed(a, falloc, base, len); + if (!ret) { + alloc_dbg(balloc_owner(a), + "Alloc-fixed failed ?? 0x%llx -> 0x%llx\n", + base, base + len); + goto fail_unlock; + } + + balloc_alloc_fixed(a, falloc); + + list_for_each_entry(bud, &falloc->buddies, buddy_entry) + real_bytes += (bud->end - bud->start); + + a->bytes_alloced += len; + a->bytes_alloced_real += real_bytes; + + alloc_dbg(balloc_owner(a), "Alloc (fixed) 0x%llx\n", base); + + nvgpu_alloc_trace_func_done(); + return base; + +fail_unlock: + alloc_unlock(__a); +fail: + kfree(falloc); + nvgpu_alloc_trace_func_done(); + return 0; +} + +/* + * Allocate a fixed address allocation. The address of the allocation is @base + * and the length is @len. This is not a typical buddy allocator operation and + * as such has a high posibility of failure if the address space is heavily in + * use. + * + * Please do not use this function unless _absolutely_ necessary. + */ +static u64 nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a, + u64 base, u64 len) +{ + u64 alloc; + struct nvgpu_buddy_allocator *a = __a->priv; + + alloc_lock(__a); + alloc = __nvgpu_balloc_fixed_buddy(__a, base, len); + a->alloc_made = 1; + alloc_unlock(__a); + + return alloc; +} + +/* + * Free the passed allocation. + */ +static void nvgpu_buddy_bfree(struct nvgpu_allocator *__a, u64 addr) +{ + struct nvgpu_buddy *bud; + struct nvgpu_fixed_alloc *falloc; + struct nvgpu_buddy_allocator *a = __a->priv; + + nvgpu_alloc_trace_func(); + + if (!addr) { + nvgpu_alloc_trace_func_done(); + return; + } + + alloc_lock(__a); + + /* + * First see if this is a fixed alloc. If not fall back to a regular + * buddy. + */ + falloc = balloc_free_fixed(a, addr); + if (falloc) { + __balloc_do_free_fixed(a, falloc); + goto done; + } + + bud = balloc_free_buddy(a, addr); + if (!bud) + goto done; + + balloc_blist_add(a, bud); + a->bytes_freed += balloc_order_to_len(a, bud->order); + + /* + * Attemp to defrag the allocation. + */ + balloc_coalesce(a, bud); + +done: + alloc_unlock(__a); + alloc_dbg(balloc_owner(a), "Free 0x%llx\n", addr); + nvgpu_alloc_trace_func_done(); + return; +} + +static bool nvgpu_buddy_reserve_is_possible(struct nvgpu_buddy_allocator *a, + struct nvgpu_alloc_carveout *co) +{ + struct nvgpu_alloc_carveout *tmp; + u64 co_base, co_end; + + co_base = co->base; + co_end = co->base + co->length; + + /* + * Not the fastest approach but we should not have that many carveouts + * for any reasonable allocator. + */ + list_for_each_entry(tmp, &a->co_list, co_entry) { + if ((co_base >= tmp->base && + co_base < (tmp->base + tmp->length)) || + (co_end >= tmp->base && + co_end < (tmp->base + tmp->length))) + return false; + } + + return true; +} + +/* + * Carveouts can only be reserved before any regular allocations have been + * made. + */ +static int nvgpu_buddy_reserve_co(struct nvgpu_allocator *__a, + struct nvgpu_alloc_carveout *co) +{ + struct nvgpu_buddy_allocator *a = __a->priv; + u64 addr; + int err = 0; + + if (co->base < a->start || (co->base + co->length) > a->end || + a->alloc_made) + return -EINVAL; + + alloc_lock(__a); + + if (!nvgpu_buddy_reserve_is_possible(a, co)) { + err = -EBUSY; + goto done; + } + + /* Should not be possible to fail... */ + addr = __nvgpu_balloc_fixed_buddy(__a, co->base, co->length); + if (!addr) { + err = -ENOMEM; + pr_warn("%s: Failed to reserve a valid carveout!\n", __func__); + goto done; + } + + list_add(&co->co_entry, &a->co_list); + +done: + alloc_unlock(__a); + return err; +} + +/* + * Carveouts can be release at any time. + */ +static void nvgpu_buddy_release_co(struct nvgpu_allocator *__a, + struct nvgpu_alloc_carveout *co) +{ + alloc_lock(__a); + + list_del_init(&co->co_entry); + nvgpu_free(__a, co->base); + + alloc_unlock(__a); +} + +static u64 nvgpu_buddy_alloc_length(struct nvgpu_allocator *a) +{ + struct nvgpu_buddy_allocator *ba = a->priv; + + return ba->length; +} + +static u64 nvgpu_buddy_alloc_base(struct nvgpu_allocator *a) +{ + struct nvgpu_buddy_allocator *ba = a->priv; + + return ba->start; +} + +static int nvgpu_buddy_alloc_inited(struct nvgpu_allocator *a) +{ + struct nvgpu_buddy_allocator *ba = a->priv; + int inited = ba->initialized; + + rmb(); + return inited; +} + +static u64 nvgpu_buddy_alloc_end(struct nvgpu_allocator *a) +{ + struct nvgpu_buddy_allocator *ba = a->priv; + + return ba->end; +} + +static u64 nvgpu_buddy_alloc_space(struct nvgpu_allocator *a) +{ + struct nvgpu_buddy_allocator *ba = a->priv; + u64 space; + + alloc_lock(a); + space = ba->end - ba->start - + (ba->bytes_alloced_real - ba->bytes_freed); + alloc_unlock(a); + + return space; +} + +/* + * Print the buddy allocator top level stats. If you pass @s as NULL then the + * stats are printed to the kernel log. This lets this code be used for + * debugging purposes internal to the allocator. + */ +static void nvgpu_buddy_print_stats(struct nvgpu_allocator *__a, + struct seq_file *s, int lock) +{ + int i = 0; + struct rb_node *node; + struct nvgpu_fixed_alloc *falloc; + struct nvgpu_alloc_carveout *tmp; + struct nvgpu_buddy_allocator *a = __a->priv; + + __alloc_pstat(s, __a, "base = %llu, limit = %llu, blk_size = %llu\n", + a->base, a->length, a->blk_size); + __alloc_pstat(s, __a, "Internal params:\n"); + __alloc_pstat(s, __a, " start = 0x%llx\n", a->start); + __alloc_pstat(s, __a, " end = 0x%llx\n", a->end); + __alloc_pstat(s, __a, " count = 0x%llx\n", a->count); + __alloc_pstat(s, __a, " blks = 0x%llx\n", a->blks); + __alloc_pstat(s, __a, " max_order = %llu\n", a->max_order); + + if (lock) + alloc_lock(__a); + + if (!list_empty(&a->co_list)) { + __alloc_pstat(s, __a, "\n"); + __alloc_pstat(s, __a, "Carveouts:\n"); + list_for_each_entry(tmp, &a->co_list, co_entry) + __alloc_pstat(s, __a, + " CO %2d: %-20s 0x%010llx + 0x%llx\n", + i++, tmp->name, tmp->base, tmp->length); + } + + __alloc_pstat(s, __a, "\n"); + __alloc_pstat(s, __a, "Buddy blocks:\n"); + __alloc_pstat(s, __a, " Order Free Alloced Split\n"); + __alloc_pstat(s, __a, " ----- ---- ------- -----\n"); + + for (i = a->max_order; i >= 0; i--) { + if (a->buddy_list_len[i] == 0 && + a->buddy_list_alloced[i] == 0 && + a->buddy_list_split[i] == 0) + continue; + + __alloc_pstat(s, __a, " %3d %-7llu %-9llu %llu\n", i, + a->buddy_list_len[i], + a->buddy_list_alloced[i], + a->buddy_list_split[i]); + } + + __alloc_pstat(s, __a, "\n"); + + for (node = rb_first(&a->fixed_allocs), i = 1; + node != NULL; + node = rb_next(node)) { + falloc = container_of(node, + struct nvgpu_fixed_alloc, alloced_entry); + + __alloc_pstat(s, __a, "Fixed alloc (%d): [0x%llx -> 0x%llx]\n", + i, falloc->start, falloc->end); + } + + __alloc_pstat(s, __a, "\n"); + __alloc_pstat(s, __a, "Bytes allocated: %llu\n", + a->bytes_alloced); + __alloc_pstat(s, __a, "Bytes allocated (real): %llu\n", + a->bytes_alloced_real); + __alloc_pstat(s, __a, "Bytes freed: %llu\n", + a->bytes_freed); + + if (lock) + alloc_unlock(__a); +} + +static const struct nvgpu_allocator_ops buddy_ops = { + .alloc = nvgpu_buddy_balloc, + .free = nvgpu_buddy_bfree, + + .alloc_fixed = nvgpu_balloc_fixed_buddy, + /* .free_fixed not needed. */ + + .reserve_carveout = nvgpu_buddy_reserve_co, + .release_carveout = nvgpu_buddy_release_co, + + .base = nvgpu_buddy_alloc_base, + .length = nvgpu_buddy_alloc_length, + .end = nvgpu_buddy_alloc_end, + .inited = nvgpu_buddy_alloc_inited, + .space = nvgpu_buddy_alloc_space, + + .fini = nvgpu_buddy_allocator_destroy, + + .print_stats = nvgpu_buddy_print_stats, +}; + +/* + * Initialize a buddy allocator. Returns 0 on success. This allocator does + * not necessarily manage bytes. It manages distinct ranges of resources. This + * allows the allocator to work for things like comp_tags, semaphores, etc. + * + * @allocator: Ptr to an allocator struct to init. + * @vm: GPU VM to associate this allocator with. Can be NULL. Will be used to + * get PTE size for GVA spaces. + * @name: Name of the allocator. Doesn't have to be static storage. + * @base: The base address of the resource pool being managed. + * @size: Number of resources in the pool. + * @blk_size: Minimum number of resources to allocate at once. For things like + * semaphores this is 1. For GVA this might be as much as 64k. This + * corresponds to order 0. Must be power of 2. + * @max_order: Pick a maximum order. If you leave this as 0, the buddy allocator + * will try and pick a reasonable max order. + * @flags: Extra flags necessary. See GPU_BALLOC_*. + */ +int __nvgpu_buddy_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a, + struct vm_gk20a *vm, const char *name, + u64 base, u64 size, u64 blk_size, + u64 max_order, u64 flags) +{ + int err; + u64 pde_size; + struct nvgpu_buddy_allocator *a; + + /* blk_size must be greater than 0 and a power of 2. */ + if (blk_size == 0) + return -EINVAL; + if (blk_size & (blk_size - 1)) + return -EINVAL; + + if (max_order > GPU_BALLOC_MAX_ORDER) + return -EINVAL; + + /* If this is to manage a GVA space we need a VM. */ + if (flags & GPU_ALLOC_GVA_SPACE && !vm) + return -EINVAL; + + a = kzalloc(sizeof(struct nvgpu_buddy_allocator), GFP_KERNEL); + if (!a) + return -ENOMEM; + + err = __nvgpu_alloc_common_init(__a, name, a, false, &buddy_ops); + if (err) + goto fail; + + a->base = base; + a->length = size; + a->blk_size = blk_size; + a->blk_shift = __ffs(blk_size); + a->owner = __a; + + /* + * If base is 0 then modfy base to be the size of one block so that we + * can return errors by returning addr == 0. + */ + if (a->base == 0) { + a->base = a->blk_size; + a->length -= a->blk_size; + } + + a->vm = vm; + if (flags & GPU_ALLOC_GVA_SPACE) { + pde_size = ((u64)vm->big_page_size) << 10; + a->pte_blk_order = balloc_get_order(a, pde_size); + } + + /* + * When we have a GVA space with big_pages enabled the size and base + * must be PDE aligned. If big_pages are not enabled then this + * requirement is not necessary. + */ + if (flags & GPU_ALLOC_GVA_SPACE && vm->big_pages && + (base & ((vm->big_page_size << 10) - 1) || + size & ((vm->big_page_size << 10) - 1))) + return -EINVAL; + + a->flags = flags; + a->max_order = max_order; + + balloc_allocator_align(a); + balloc_compute_max_order(a); + + /* Shared buddy kmem_cache for all allocators. */ + if (!buddy_cache) + buddy_cache = KMEM_CACHE(nvgpu_buddy, 0); + if (!buddy_cache) { + err = -ENOMEM; + goto fail; + } + + a->alloced_buddies = RB_ROOT; + a->fixed_allocs = RB_ROOT; + INIT_LIST_HEAD(&a->co_list); + err = balloc_init_lists(a); + if (err) + goto fail; + + wmb(); + a->initialized = 1; + + nvgpu_init_alloc_debug(g, __a); + alloc_dbg(__a, "New allocator: type buddy\n"); + alloc_dbg(__a, " base 0x%llx\n", a->base); + alloc_dbg(__a, " size 0x%llx\n", a->length); + alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size); + alloc_dbg(__a, " max_order %llu\n", a->max_order); + alloc_dbg(__a, " flags 0x%llx\n", a->flags); + + return 0; + +fail: + kfree(a); + return err; +} + +int nvgpu_buddy_allocator_init(struct gk20a *g, struct nvgpu_allocator *a, + const char *name, u64 base, u64 size, + u64 blk_size, u64 flags) +{ + return __nvgpu_buddy_allocator_init(g, a, NULL, name, + base, size, blk_size, 0, 0); +} diff --git a/drivers/gpu/nvgpu/common/mm/buddy_allocator_priv.h b/drivers/gpu/nvgpu/common/mm/buddy_allocator_priv.h new file mode 100644 index 00000000..50a11f14 --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/buddy_allocator_priv.h @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef BUDDY_ALLOCATOR_PRIV_H +#define BUDDY_ALLOCATOR_PRIV_H + +#include +#include + +struct nvgpu_allocator; +struct vm_gk20a; + +/* + * Each buddy is an element in a binary tree. + */ +struct nvgpu_buddy { + struct nvgpu_buddy *parent; /* Parent node. */ + struct nvgpu_buddy *buddy; /* This node's buddy. */ + struct nvgpu_buddy *left; /* Lower address sub-node. */ + struct nvgpu_buddy *right; /* Higher address sub-node. */ + + struct list_head buddy_entry; /* List entry for various lists. */ + struct rb_node alloced_entry; /* RB tree of allocations. */ + + u64 start; /* Start address of this buddy. */ + u64 end; /* End address of this buddy. */ + u64 order; /* Buddy order. */ + +#define BALLOC_BUDDY_ALLOCED 0x1 +#define BALLOC_BUDDY_SPLIT 0x2 +#define BALLOC_BUDDY_IN_LIST 0x4 + int flags; /* List of associated flags. */ + + /* + * Size of the PDE this buddy is using. This allows for grouping like + * sized allocations into the same PDE. This uses the gmmu_pgsz_gk20a + * enum except for the BALLOC_PTE_SIZE_ANY specifier. + */ +#define BALLOC_PTE_SIZE_ANY -1 + int pte_size; +}; + +#define __buddy_flag_ops(flag, flag_up) \ + static inline int buddy_is_ ## flag(struct nvgpu_buddy *b) \ + { \ + return b->flags & BALLOC_BUDDY_ ## flag_up; \ + } \ + static inline void buddy_set_ ## flag(struct nvgpu_buddy *b) \ + { \ + b->flags |= BALLOC_BUDDY_ ## flag_up; \ + } \ + static inline void buddy_clr_ ## flag(struct nvgpu_buddy *b) \ + { \ + b->flags &= ~BALLOC_BUDDY_ ## flag_up; \ + } + +/* + * int buddy_is_alloced(struct nvgpu_buddy *b); + * void buddy_set_alloced(struct nvgpu_buddy *b); + * void buddy_clr_alloced(struct nvgpu_buddy *b); + * + * int buddy_is_split(struct nvgpu_buddy *b); + * void buddy_set_split(struct nvgpu_buddy *b); + * void buddy_clr_split(struct nvgpu_buddy *b); + * + * int buddy_is_in_list(struct nvgpu_buddy *b); + * void buddy_set_in_list(struct nvgpu_buddy *b); + * void buddy_clr_in_list(struct nvgpu_buddy *b); + */ +__buddy_flag_ops(alloced, ALLOCED); +__buddy_flag_ops(split, SPLIT); +__buddy_flag_ops(in_list, IN_LIST); + +/* + * Keeps info for a fixed allocation. + */ +struct nvgpu_fixed_alloc { + struct list_head buddies; /* List of buddies. */ + struct rb_node alloced_entry; /* RB tree of fixed allocations. */ + + u64 start; /* Start of fixed block. */ + u64 end; /* End address. */ +}; + +/* + * GPU buddy allocator for the various GPU address spaces. Each addressable unit + * doesn't have to correspond to a byte. In some cases each unit is a more + * complex object such as a comp_tag line or the like. + * + * The max order is computed based on the size of the minimum order and the size + * of the address space. + * + * order_size is the size of an order 0 buddy. + */ +struct nvgpu_buddy_allocator { + struct nvgpu_allocator *owner; /* Owner of this buddy allocator. */ + struct vm_gk20a *vm; /* Parent VM - can be NULL. */ + + u64 base; /* Base address of the space. */ + u64 length; /* Length of the space. */ + u64 blk_size; /* Size of order 0 allocation. */ + u64 blk_shift; /* Shift to divide by blk_size. */ + + /* Internal stuff. */ + u64 start; /* Real start (aligned to blk_size). */ + u64 end; /* Real end, trimmed if needed. */ + u64 count; /* Count of objects in space. */ + u64 blks; /* Count of blks in the space. */ + u64 max_order; /* Specific maximum order. */ + + struct rb_root alloced_buddies; /* Outstanding allocations. */ + struct rb_root fixed_allocs; /* Outstanding fixed allocations. */ + + struct list_head co_list; + + /* + * Impose an upper bound on the maximum order. + */ +#define GPU_BALLOC_ORDER_LIST_LEN (GPU_BALLOC_MAX_ORDER + 1) + + struct list_head buddy_list[GPU_BALLOC_ORDER_LIST_LEN]; + u64 buddy_list_len[GPU_BALLOC_ORDER_LIST_LEN]; + u64 buddy_list_split[GPU_BALLOC_ORDER_LIST_LEN]; + u64 buddy_list_alloced[GPU_BALLOC_ORDER_LIST_LEN]; + + /* + * This is for when the allocator is managing a GVA space (the + * GPU_ALLOC_GVA_SPACE bit is set in @flags). This requires + * that we group like sized allocations into PDE blocks. + */ + u64 pte_blk_order; + + int initialized; + int alloc_made; /* True after the first alloc. */ + + u64 flags; + + u64 bytes_alloced; + u64 bytes_alloced_real; + u64 bytes_freed; +}; + +static inline struct nvgpu_buddy_allocator *buddy_allocator( + struct nvgpu_allocator *a) +{ + return (struct nvgpu_buddy_allocator *)(a)->priv; +} + +static inline struct list_head *balloc_get_order_list( + struct nvgpu_buddy_allocator *a, int order) +{ + return &a->buddy_list[order]; +} + +static inline u64 balloc_order_to_len(struct nvgpu_buddy_allocator *a, + int order) +{ + return (1 << order) * a->blk_size; +} + +static inline u64 balloc_base_shift(struct nvgpu_buddy_allocator *a, + u64 base) +{ + return base - a->start; +} + +static inline u64 balloc_base_unshift(struct nvgpu_buddy_allocator *a, + u64 base) +{ + return base + a->start; +} + +static inline struct nvgpu_allocator *balloc_owner( + struct nvgpu_buddy_allocator *a) +{ + return a->owner; +} + +#endif diff --git a/drivers/gpu/nvgpu/common/mm/lockless_allocator.c b/drivers/gpu/nvgpu/common/mm/lockless_allocator.c new file mode 100644 index 00000000..e3063a42 --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/lockless_allocator.c @@ -0,0 +1,207 @@ +/* + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include + +#include + +#include "lockless_allocator_priv.h" + +static u64 nvgpu_lockless_alloc_length(struct nvgpu_allocator *a) +{ + struct nvgpu_lockless_allocator *pa = a->priv; + + return pa->length; +} + +static u64 nvgpu_lockless_alloc_base(struct nvgpu_allocator *a) +{ + struct nvgpu_lockless_allocator *pa = a->priv; + + return pa->base; +} + +static int nvgpu_lockless_alloc_inited(struct nvgpu_allocator *a) +{ + struct nvgpu_lockless_allocator *pa = a->priv; + int inited = pa->inited; + + rmb(); + return inited; +} + +static u64 nvgpu_lockless_alloc_end(struct nvgpu_allocator *a) +{ + struct nvgpu_lockless_allocator *pa = a->priv; + + return pa->base + pa->length; +} + +static u64 nvgpu_lockless_alloc(struct nvgpu_allocator *a, u64 len) +{ + struct nvgpu_lockless_allocator *pa = a->priv; + int head, new_head, ret; + u64 addr = 0; + + if (len != pa->blk_size) + return 0; + + head = ACCESS_ONCE(pa->head); + while (head >= 0) { + new_head = ACCESS_ONCE(pa->next[head]); + ret = cmpxchg(&pa->head, head, new_head); + if (ret == head) { + addr = pa->base + head * pa->blk_size; + atomic_inc(&pa->nr_allocs); + alloc_dbg(a, "Alloc node # %d @ addr 0x%llx\n", head, + addr); + break; + } + head = ACCESS_ONCE(pa->head); + } + return addr; +} + +static void nvgpu_lockless_free(struct nvgpu_allocator *a, u64 addr) +{ + struct nvgpu_lockless_allocator *pa = a->priv; + int head, ret; + u64 cur_idx, rem; + + cur_idx = addr - pa->base; + rem = do_div(cur_idx, pa->blk_size); + + while (1) { + head = ACCESS_ONCE(pa->head); + ACCESS_ONCE(pa->next[cur_idx]) = head; + ret = cmpxchg(&pa->head, head, cur_idx); + if (ret == head) { + atomic_dec(&pa->nr_allocs); + alloc_dbg(a, "Free node # %llu\n", cur_idx); + break; + } + } +} + +static void nvgpu_lockless_alloc_destroy(struct nvgpu_allocator *a) +{ + struct nvgpu_lockless_allocator *pa = a->priv; + + nvgpu_fini_alloc_debug(a); + + vfree(pa->next); + kfree(pa); +} + +static void nvgpu_lockless_print_stats(struct nvgpu_allocator *a, + struct seq_file *s, int lock) +{ + struct nvgpu_lockless_allocator *pa = a->priv; + + __alloc_pstat(s, a, "Lockless allocator params:\n"); + __alloc_pstat(s, a, " start = 0x%llx\n", pa->base); + __alloc_pstat(s, a, " end = 0x%llx\n", pa->base + pa->length); + + /* Actual stats. */ + __alloc_pstat(s, a, "Stats:\n"); + __alloc_pstat(s, a, " Number allocs = %d\n", + atomic_read(&pa->nr_allocs)); + __alloc_pstat(s, a, " Number free = %d\n", + pa->nr_nodes - atomic_read(&pa->nr_allocs)); +} + +static const struct nvgpu_allocator_ops pool_ops = { + .alloc = nvgpu_lockless_alloc, + .free = nvgpu_lockless_free, + + .base = nvgpu_lockless_alloc_base, + .length = nvgpu_lockless_alloc_length, + .end = nvgpu_lockless_alloc_end, + .inited = nvgpu_lockless_alloc_inited, + + .fini = nvgpu_lockless_alloc_destroy, + + .print_stats = nvgpu_lockless_print_stats, +}; + +int nvgpu_lockless_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a, + const char *name, u64 base, u64 length, + u64 blk_size, u64 flags) +{ + int i; + int err; + int nr_nodes; + u64 count, rem; + struct nvgpu_lockless_allocator *a; + + if (!blk_size) + return -EINVAL; + + /* + * Ensure we have space for atleast one node & there's no overflow. + * In order to control memory footprint, we require count < INT_MAX + */ + count = length; + rem = do_div(count, blk_size); + if (!base || !count || count > INT_MAX) + return -EINVAL; + + a = kzalloc(sizeof(struct nvgpu_lockless_allocator), GFP_KERNEL); + if (!a) + return -ENOMEM; + + err = __nvgpu_alloc_common_init(__a, name, a, false, &pool_ops); + if (err) + goto fail; + + a->next = vzalloc(sizeof(*a->next) * count); + if (!a->next) { + err = -ENOMEM; + goto fail; + } + + /* chain the elements together to form the initial free list */ + nr_nodes = (int)count; + for (i = 0; i < nr_nodes; i++) + a->next[i] = i + 1; + a->next[nr_nodes - 1] = -1; + + a->base = base; + a->length = length; + a->blk_size = blk_size; + a->nr_nodes = nr_nodes; + a->flags = flags; + atomic_set(&a->nr_allocs, 0); + + wmb(); + a->inited = true; + + nvgpu_init_alloc_debug(g, __a); + alloc_dbg(__a, "New allocator: type lockless\n"); + alloc_dbg(__a, " base 0x%llx\n", a->base); + alloc_dbg(__a, " nodes %d\n", a->nr_nodes); + alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size); + alloc_dbg(__a, " flags 0x%llx\n", a->flags); + + return 0; + +fail: + kfree(a); + return err; +} diff --git a/drivers/gpu/nvgpu/common/mm/lockless_allocator_priv.h b/drivers/gpu/nvgpu/common/mm/lockless_allocator_priv.h new file mode 100644 index 00000000..32421ac1 --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/lockless_allocator_priv.h @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +/* + * Basics: + * + * - Lockless memory allocator for fixed-size structures, whose + * size is defined up front at init time. + * - Memory footprint scales linearly w/ the number of structures in + * the pool. It is ~= sizeof(int) * N. + * - Memory is pre-allocated by the client. The allocator itself + * only computes the addresses for allocations. + * - Limit of MAX_INT nodes that the allocator can be responsible for. + * + * Implementation details: + * + * The allocator maintains a single list of free nodes. We allocate & + * free nodes from the head of the list. We rely on the cmpxchg() operator + * to maintain atomicity on the head. + * + * So, both allocs & frees are O(1)!! + * + * -- Definitions -- + * Block Size - size of a single structure that this allocator will + * allocate. + * Node - one of the elements of size blk_size in the + * client-allocated buffer. + * Node Index - zero-based index of a node in the client-allocated + * contiguous buffer. + * + * -- Initial State -- + * We maintain the following to track the state of the free list: + * + * 1) A "head" index to track the index of the first free node in the list + * 2) A "next" array to track the index of the next free node in the list + * for every node. So next[head], will give the index to the 2nd free + * element in the list. + * + * So, to begin with, the free list consists of all node indices, and each + * position in the next array contains index N + 1: + * + * head = 0 + * next = [1, 2, 3, 4, -1] : Example for a user-allocated buffer of 5 nodes + * free_list = 0->1->2->3->4->-1 + * + * -- Allocations -- + * 1) Read the current head (aka acq_head) + * 2) Read next[acq_head], to get the 2nd free element (aka new_head) + * 3) cmp_xchg(&head, acq_head, new_head) + * 4) If it succeeds, compute the address of the node, based on + * base address, blk_size, & acq_head. + * + * head = 1; + * next = [1, 2, 3, 4, -1] : Example after allocating Node #0 + * free_list = 1->2->3->4->-1 + * + * head = 2; + * next = [1, 2, 3, 4, -1] : Example after allocating Node #1 + * free_list = 2->3->4->-1 + * + * -- Frees -- + * 1) Based on the address to be freed, calculate the index of the node + * being freed (cur_idx) + * 2) Read the current head (old_head) + * 3) So the freed node is going to go at the head of the list, and we + * want to put the old_head after it. So next[cur_idx] = old_head + * 4) cmpxchg(head, old_head, cur_idx) + * + * head = 0 + * next = [2, 2, 3, 4, -1] + * free_list = 0->2->3->4->-1 : Example after freeing Node #0 + * + * head = 1 + * next = [2, 0, 3, 4, -1] + * free_list = 1->0->2->3->4->-1 : Example after freeing Node #1 + */ + +#ifndef LOCKLESS_ALLOCATOR_PRIV_H +#define LOCKLESS_ALLOCATOR_PRIV_H + +struct nvgpu_allocator; + +struct nvgpu_lockless_allocator { + struct nvgpu_allocator *owner; + + u64 base; /* Base address of the space. */ + u64 length; /* Length of the space. */ + u64 blk_size; /* Size of the structure being allocated */ + int nr_nodes; /* Number of nodes available for allocation */ + + int *next; /* An array holding the next indices per node */ + int head; /* Current node at the top of the stack */ + + u64 flags; + + bool inited; + + /* Statistics */ + atomic_t nr_allocs; +}; + +static inline struct nvgpu_lockless_allocator *lockless_allocator( + struct nvgpu_allocator *a) +{ + return (struct nvgpu_lockless_allocator *)(a)->priv; +} + +#endif diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c b/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c new file mode 100644 index 00000000..ebd779c0 --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c @@ -0,0 +1,212 @@ +/* + * gk20a allocator + * + * Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +#include + +#include "gk20a/gk20a.h" +#include "gk20a/mm_gk20a.h" +#include "gk20a/platform_gk20a.h" + +u32 nvgpu_alloc_tracing_on; + +u64 nvgpu_alloc_length(struct nvgpu_allocator *a) +{ + if (a->ops->length) + return a->ops->length(a); + + return 0; +} + +u64 nvgpu_alloc_base(struct nvgpu_allocator *a) +{ + if (a->ops->base) + return a->ops->base(a); + + return 0; +} + +u64 nvgpu_alloc_initialized(struct nvgpu_allocator *a) +{ + if (!a->ops || !a->ops->inited) + return 0; + + return a->ops->inited(a); +} + +u64 nvgpu_alloc_end(struct nvgpu_allocator *a) +{ + if (a->ops->end) + return a->ops->end(a); + + return 0; +} + +u64 nvgpu_alloc_space(struct nvgpu_allocator *a) +{ + if (a->ops->space) + return a->ops->space(a); + + return 0; +} + +u64 nvgpu_alloc(struct nvgpu_allocator *a, u64 len) +{ + return a->ops->alloc(a, len); +} + +void nvgpu_free(struct nvgpu_allocator *a, u64 addr) +{ + a->ops->free(a, addr); +} + +u64 nvgpu_alloc_fixed(struct nvgpu_allocator *a, u64 base, u64 len) +{ + if (a->ops->alloc_fixed) + return a->ops->alloc_fixed(a, base, len); + + return 0; +} + +void nvgpu_free_fixed(struct nvgpu_allocator *a, u64 base, u64 len) +{ + /* + * If this operation is not defined for the allocator then just do + * nothing. The alternative would be to fall back on the regular + * free but that may be harmful in unexpected ways. + */ + if (a->ops->free_fixed) + a->ops->free_fixed(a, base, len); +} + +int nvgpu_alloc_reserve_carveout(struct nvgpu_allocator *a, + struct nvgpu_alloc_carveout *co) +{ + if (a->ops->reserve_carveout) + return a->ops->reserve_carveout(a, co); + + return -ENODEV; +} + +void nvgpu_alloc_release_carveout(struct nvgpu_allocator *a, + struct nvgpu_alloc_carveout *co) +{ + if (a->ops->release_carveout) + a->ops->release_carveout(a, co); +} + +void nvgpu_alloc_destroy(struct nvgpu_allocator *a) +{ + a->ops->fini(a); + memset(a, 0, sizeof(*a)); +} + +/* + * Handle the common init stuff for a nvgpu_allocator. + */ +int __nvgpu_alloc_common_init(struct nvgpu_allocator *a, + const char *name, void *priv, bool dbg, + const struct nvgpu_allocator_ops *ops) +{ + if (!ops) + return -EINVAL; + + /* + * This is the bare minimum operations required for a sensible + * allocator. + */ + if (!ops->alloc || !ops->free || !ops->fini) + return -EINVAL; + + a->ops = ops; + a->priv = priv; + a->debug = dbg; + + mutex_init(&a->lock); + + strlcpy(a->name, name, sizeof(a->name)); + + return 0; +} + +void nvgpu_alloc_print_stats(struct nvgpu_allocator *__a, + struct seq_file *s, int lock) +{ + __a->ops->print_stats(__a, s, lock); +} + +#ifdef CONFIG_DEBUG_FS +static int __alloc_show(struct seq_file *s, void *unused) +{ + struct nvgpu_allocator *a = s->private; + + nvgpu_alloc_print_stats(a, s, 1); + + return 0; +} + +static int __alloc_open(struct inode *inode, struct file *file) +{ + return single_open(file, __alloc_show, inode->i_private); +} + +static const struct file_operations __alloc_fops = { + .open = __alloc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; +#endif + +void nvgpu_init_alloc_debug(struct gk20a *g, struct nvgpu_allocator *a) +{ +#ifdef CONFIG_DEBUG_FS + if (!g->debugfs_allocators) + return; + + a->debugfs_entry = debugfs_create_file(a->name, S_IRUGO, + g->debugfs_allocators, + a, &__alloc_fops); +#endif +} + +void nvgpu_fini_alloc_debug(struct nvgpu_allocator *a) +{ +#ifdef CONFIG_DEBUG_FS + if (!IS_ERR_OR_NULL(a->debugfs_entry)) + debugfs_remove(a->debugfs_entry); +#endif +} + +void nvgpu_alloc_debugfs_init(struct device *dev) +{ +#ifdef CONFIG_DEBUG_FS + struct gk20a_platform *platform = dev_get_drvdata(dev); + struct dentry *gpu_root = platform->debugfs; + struct gk20a *g = get_gk20a(dev); + + g->debugfs_allocators = debugfs_create_dir("allocators", gpu_root); + if (IS_ERR_OR_NULL(g->debugfs_allocators)) + return; + + debugfs_create_u32("tracing", 0664, g->debugfs_allocators, + &nvgpu_alloc_tracing_on); +#endif +} diff --git a/drivers/gpu/nvgpu/common/mm/page_allocator.c b/drivers/gpu/nvgpu/common/mm/page_allocator.c new file mode 100644 index 00000000..c61b2238 --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/page_allocator.c @@ -0,0 +1,937 @@ +/* + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include + +#include +#include + +#include "buddy_allocator_priv.h" + +#define palloc_dbg(a, fmt, arg...) \ + alloc_dbg(palloc_owner(a), fmt, ##arg) + +static struct kmem_cache *page_alloc_cache; +static struct kmem_cache *page_alloc_chunk_cache; +static struct kmem_cache *page_alloc_slab_page_cache; +static DEFINE_MUTEX(meta_data_cache_lock); + +/* + * Handle the book-keeping for these operations. + */ +static inline void add_slab_page_to_empty(struct page_alloc_slab *slab, + struct page_alloc_slab_page *page) +{ + BUG_ON(page->state != SP_NONE); + list_add(&page->list_entry, &slab->empty); + slab->nr_empty++; + page->state = SP_EMPTY; +} +static inline void add_slab_page_to_partial(struct page_alloc_slab *slab, + struct page_alloc_slab_page *page) +{ + BUG_ON(page->state != SP_NONE); + list_add(&page->list_entry, &slab->partial); + slab->nr_partial++; + page->state = SP_PARTIAL; +} +static inline void add_slab_page_to_full(struct page_alloc_slab *slab, + struct page_alloc_slab_page *page) +{ + BUG_ON(page->state != SP_NONE); + list_add(&page->list_entry, &slab->full); + slab->nr_full++; + page->state = SP_FULL; +} + +static inline void del_slab_page_from_empty(struct page_alloc_slab *slab, + struct page_alloc_slab_page *page) +{ + list_del_init(&page->list_entry); + slab->nr_empty--; + page->state = SP_NONE; +} +static inline void del_slab_page_from_partial(struct page_alloc_slab *slab, + struct page_alloc_slab_page *page) +{ + list_del_init(&page->list_entry); + slab->nr_partial--; + page->state = SP_NONE; +} +static inline void del_slab_page_from_full(struct page_alloc_slab *slab, + struct page_alloc_slab_page *page) +{ + list_del_init(&page->list_entry); + slab->nr_full--; + page->state = SP_NONE; +} + +static u64 nvgpu_page_alloc_length(struct nvgpu_allocator *a) +{ + struct nvgpu_page_allocator *va = a->priv; + + return nvgpu_alloc_length(&va->source_allocator); +} + +static u64 nvgpu_page_alloc_base(struct nvgpu_allocator *a) +{ + struct nvgpu_page_allocator *va = a->priv; + + return nvgpu_alloc_base(&va->source_allocator); +} + +static int nvgpu_page_alloc_inited(struct nvgpu_allocator *a) +{ + struct nvgpu_page_allocator *va = a->priv; + + return nvgpu_alloc_initialized(&va->source_allocator); +} + +static u64 nvgpu_page_alloc_end(struct nvgpu_allocator *a) +{ + struct nvgpu_page_allocator *va = a->priv; + + return nvgpu_alloc_end(&va->source_allocator); +} + +static u64 nvgpu_page_alloc_space(struct nvgpu_allocator *a) +{ + struct nvgpu_page_allocator *va = a->priv; + + return nvgpu_alloc_space(&va->source_allocator); +} + +static int nvgpu_page_reserve_co(struct nvgpu_allocator *a, + struct nvgpu_alloc_carveout *co) +{ + struct nvgpu_page_allocator *va = a->priv; + + return nvgpu_alloc_reserve_carveout(&va->source_allocator, co); +} + +static void nvgpu_page_release_co(struct nvgpu_allocator *a, + struct nvgpu_alloc_carveout *co) +{ + struct nvgpu_page_allocator *va = a->priv; + + nvgpu_alloc_release_carveout(&va->source_allocator, co); +} + +static void __nvgpu_free_pages(struct nvgpu_page_allocator *a, + struct nvgpu_page_alloc *alloc, + bool free_buddy_alloc) +{ + struct page_alloc_chunk *chunk; + + while (!list_empty(&alloc->alloc_chunks)) { + chunk = list_first_entry(&alloc->alloc_chunks, + struct page_alloc_chunk, + list_entry); + list_del(&chunk->list_entry); + + if (free_buddy_alloc) + nvgpu_free(&a->source_allocator, chunk->base); + kfree(chunk); + } + + kfree(alloc); +} + +static int __insert_page_alloc(struct nvgpu_page_allocator *a, + struct nvgpu_page_alloc *alloc) +{ + struct rb_node **new = &a->allocs.rb_node; + struct rb_node *parent = NULL; + + while (*new) { + struct nvgpu_page_alloc *tmp = + container_of(*new, struct nvgpu_page_alloc, + tree_entry); + + parent = *new; + if (alloc->base < tmp->base) { + new = &((*new)->rb_left); + } else if (alloc->base > tmp->base) { + new = &((*new)->rb_right); + } else { + WARN(1, "Duplicate entries in allocated list!\n"); + return 0; + } + } + + rb_link_node(&alloc->tree_entry, parent, new); + rb_insert_color(&alloc->tree_entry, &a->allocs); + + return 0; +} + +static struct nvgpu_page_alloc *__find_page_alloc( + struct nvgpu_page_allocator *a, + u64 addr) +{ + struct rb_node *node = a->allocs.rb_node; + struct nvgpu_page_alloc *alloc; + + while (node) { + alloc = container_of(node, struct nvgpu_page_alloc, tree_entry); + + if (addr < alloc->base) + node = node->rb_left; + else if (addr > alloc->base) + node = node->rb_right; + else + break; + } + + if (!node) + return NULL; + + rb_erase(node, &a->allocs); + + return alloc; +} + +static struct page_alloc_slab_page *alloc_slab_page( + struct nvgpu_page_allocator *a, + struct page_alloc_slab *slab) +{ + struct page_alloc_slab_page *slab_page; + + slab_page = kmem_cache_alloc(page_alloc_slab_page_cache, GFP_KERNEL); + if (!slab_page) { + palloc_dbg(a, "OOM: unable to alloc slab_page struct!\n"); + return ERR_PTR(-ENOMEM); + } + + memset(slab_page, 0, sizeof(*slab_page)); + + slab_page->page_addr = nvgpu_alloc(&a->source_allocator, a->page_size); + if (!slab_page->page_addr) { + kfree(slab_page); + palloc_dbg(a, "OOM: vidmem is full!\n"); + return ERR_PTR(-ENOMEM); + } + + INIT_LIST_HEAD(&slab_page->list_entry); + slab_page->slab_size = slab->slab_size; + slab_page->nr_objects = (u32)a->page_size / slab->slab_size; + slab_page->nr_objects_alloced = 0; + slab_page->owner = slab; + slab_page->state = SP_NONE; + + a->pages_alloced++; + + palloc_dbg(a, "Allocated new slab page @ 0x%012llx size=%u\n", + slab_page->page_addr, slab_page->slab_size); + + return slab_page; +} + +static void free_slab_page(struct nvgpu_page_allocator *a, + struct page_alloc_slab_page *slab_page) +{ + palloc_dbg(a, "Freeing slab page @ 0x%012llx\n", slab_page->page_addr); + + BUG_ON((slab_page->state != SP_NONE && slab_page->state != SP_EMPTY) || + slab_page->nr_objects_alloced != 0 || + slab_page->bitmap != 0); + + nvgpu_free(&a->source_allocator, slab_page->page_addr); + a->pages_freed++; + + kmem_cache_free(page_alloc_slab_page_cache, slab_page); +} + +/* + * This expects @alloc to have 1 empty page_alloc_chunk already added to the + * alloc_chunks list. + */ +static int __do_slab_alloc(struct nvgpu_page_allocator *a, + struct page_alloc_slab *slab, + struct nvgpu_page_alloc *alloc) +{ + struct page_alloc_slab_page *slab_page = NULL; + struct page_alloc_chunk *chunk; + unsigned long offs; + + /* + * Check the partial and empty lists to see if we have some space + * readily available. Take the slab_page out of what ever list it + * was in since it may be put back into a different list later. + */ + if (!list_empty(&slab->partial)) { + slab_page = list_first_entry(&slab->partial, + struct page_alloc_slab_page, + list_entry); + del_slab_page_from_partial(slab, slab_page); + } else if (!list_empty(&slab->empty)) { + slab_page = list_first_entry(&slab->empty, + struct page_alloc_slab_page, + list_entry); + del_slab_page_from_empty(slab, slab_page); + } + + if (!slab_page) { + slab_page = alloc_slab_page(a, slab); + if (IS_ERR(slab_page)) + return PTR_ERR(slab_page); + } + + /* + * We now have a slab_page. Do the alloc. + */ + offs = bitmap_find_next_zero_area(&slab_page->bitmap, + slab_page->nr_objects, + 0, 1, 0); + if (offs >= slab_page->nr_objects) { + WARN(1, "Empty/partial slab with no free objects?"); + + /* Add the buggy page to the full list... This isn't ideal. */ + add_slab_page_to_full(slab, slab_page); + return -ENOMEM; + } + + bitmap_set(&slab_page->bitmap, offs, 1); + slab_page->nr_objects_alloced++; + + if (slab_page->nr_objects_alloced < slab_page->nr_objects) + add_slab_page_to_partial(slab, slab_page); + else if (slab_page->nr_objects_alloced == slab_page->nr_objects) + add_slab_page_to_full(slab, slab_page); + else + BUG(); /* Should be impossible to hit this. */ + + /* + * Handle building the nvgpu_page_alloc struct. We expect one + * page_alloc_chunk to be present. + */ + alloc->slab_page = slab_page; + alloc->nr_chunks = 1; + alloc->length = slab_page->slab_size; + alloc->base = slab_page->page_addr + (offs * slab_page->slab_size); + + chunk = list_first_entry(&alloc->alloc_chunks, + struct page_alloc_chunk, list_entry); + chunk->base = alloc->base; + chunk->length = alloc->length; + + return 0; +} + +/* + * Allocate from a slab instead of directly from the page allocator. + */ +static struct nvgpu_page_alloc *__nvgpu_alloc_slab( + struct nvgpu_page_allocator *a, u64 len) +{ + int err, slab_nr; + struct page_alloc_slab *slab; + struct nvgpu_page_alloc *alloc = NULL; + struct page_alloc_chunk *chunk = NULL; + + /* + * Align the length to a page and then divide by the page size (4k for + * this code). ilog2() of that then gets us the correct slab to use. + */ + slab_nr = (int)ilog2(PAGE_ALIGN(len) >> 12); + slab = &a->slabs[slab_nr]; + + alloc = kmem_cache_alloc(page_alloc_cache, GFP_KERNEL); + if (!alloc) { + palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n"); + goto fail; + } + chunk = kmem_cache_alloc(page_alloc_chunk_cache, GFP_KERNEL); + if (!chunk) { + palloc_dbg(a, "OOM: could not alloc alloc_chunk struct!\n"); + goto fail; + } + + INIT_LIST_HEAD(&alloc->alloc_chunks); + list_add(&chunk->list_entry, &alloc->alloc_chunks); + + err = __do_slab_alloc(a, slab, alloc); + if (err) + goto fail; + + palloc_dbg(a, "Alloc 0x%04llx sr=%d id=0x%010llx [slab]\n", + len, slab_nr, alloc->base); + a->nr_slab_allocs++; + + return alloc; + +fail: + kfree(alloc); + kfree(chunk); + return NULL; +} + +static void __nvgpu_free_slab(struct nvgpu_page_allocator *a, + struct nvgpu_page_alloc *alloc) +{ + struct page_alloc_slab_page *slab_page = alloc->slab_page; + struct page_alloc_slab *slab = slab_page->owner; + enum slab_page_state new_state; + int offs; + + offs = (u32)(alloc->base - slab_page->page_addr) / slab_page->slab_size; + bitmap_clear(&slab_page->bitmap, offs, 1); + + slab_page->nr_objects_alloced--; + + if (slab_page->nr_objects_alloced == 0) + new_state = SP_EMPTY; + else + new_state = SP_PARTIAL; + + /* + * Need to migrate the page to a different list. + */ + if (new_state != slab_page->state) { + /* Delete - can't be in empty. */ + if (slab_page->state == SP_PARTIAL) + del_slab_page_from_partial(slab, slab_page); + else + del_slab_page_from_full(slab, slab_page); + + /* And add. */ + if (new_state == SP_EMPTY) { + if (list_empty(&slab->empty)) + add_slab_page_to_empty(slab, slab_page); + else + free_slab_page(a, slab_page); + } else { + add_slab_page_to_partial(slab, slab_page); + } + } + + /* + * Now handle the page_alloc. + */ + __nvgpu_free_pages(a, alloc, false); + a->nr_slab_frees++; + + return; +} + +/* + * Allocate physical pages. Since the underlying allocator is a buddy allocator + * the returned pages are always contiguous. However, since there could be + * fragmentation in the space this allocator will collate smaller non-contiguous + * allocations together if necessary. + */ +static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages( + struct nvgpu_page_allocator *a, u64 pages) +{ + struct nvgpu_page_alloc *alloc; + struct page_alloc_chunk *c; + u64 max_chunk_len = pages << a->page_shift; + int i = 0; + + alloc = kmem_cache_alloc(page_alloc_cache, GFP_KERNEL); + if (!alloc) + goto fail; + + memset(alloc, 0, sizeof(*alloc)); + + INIT_LIST_HEAD(&alloc->alloc_chunks); + alloc->length = pages << a->page_shift; + + while (pages) { + u64 chunk_addr = 0; + u64 chunk_pages = (u64)1 << __fls(pages); + u64 chunk_len = chunk_pages << a->page_shift; + + /* + * Take care of the possibility that the allocation must be + * contiguous. If this is not the first iteration then that + * means the first iteration failed to alloc the entire + * requested size. The buddy allocator guarantees any given + * single alloc is contiguous. + */ + if (a->flags & GPU_ALLOC_FORCE_CONTIG && i != 0) + goto fail_cleanup; + + if (chunk_len > max_chunk_len) + chunk_len = max_chunk_len; + + /* + * Keep attempting to allocate in smaller chunks until the alloc + * either succeeds or is smaller than the page_size of the + * allocator (i.e the allocator is OOM). + */ + do { + chunk_addr = nvgpu_alloc(&a->source_allocator, + chunk_len); + + /* Divide by 2 and try again */ + if (!chunk_addr) { + palloc_dbg(a, "balloc failed: 0x%llx\n", + chunk_len); + chunk_len >>= 1; + max_chunk_len = chunk_len; + } + } while (!chunk_addr && chunk_len >= a->page_size); + + chunk_pages = chunk_len >> a->page_shift; + + if (!chunk_addr) { + palloc_dbg(a, "bailing @ 0x%llx\n", chunk_len); + goto fail_cleanup; + } + + c = kmem_cache_alloc(page_alloc_chunk_cache, GFP_KERNEL); + if (!c) { + nvgpu_free(&a->source_allocator, chunk_addr); + goto fail_cleanup; + } + + pages -= chunk_pages; + + c->base = chunk_addr; + c->length = chunk_len; + list_add(&c->list_entry, &alloc->alloc_chunks); + + i++; + } + + alloc->nr_chunks = i; + c = list_first_entry(&alloc->alloc_chunks, + struct page_alloc_chunk, list_entry); + alloc->base = c->base; + + return alloc; + +fail_cleanup: + while (!list_empty(&alloc->alloc_chunks)) { + c = list_first_entry(&alloc->alloc_chunks, + struct page_alloc_chunk, list_entry); + list_del(&c->list_entry); + nvgpu_free(&a->source_allocator, c->base); + kfree(c); + } + kfree(alloc); +fail: + return ERR_PTR(-ENOMEM); +} + +static struct nvgpu_page_alloc *__nvgpu_alloc_pages( + struct nvgpu_page_allocator *a, u64 len) +{ + struct nvgpu_page_alloc *alloc = NULL; + struct page_alloc_chunk *c; + u64 pages; + int i = 0; + + pages = ALIGN(len, a->page_size) >> a->page_shift; + + alloc = __do_nvgpu_alloc_pages(a, pages); + if (IS_ERR(alloc)) { + palloc_dbg(a, "Alloc 0x%llx (%llu) (failed)\n", + pages << a->page_shift, pages); + return NULL; + } + + palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n", + pages << a->page_shift, pages, alloc->base); + list_for_each_entry(c, &alloc->alloc_chunks, list_entry) { + palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", + i++, c->base, c->length); + } + + return alloc; +} + +/* + * Allocate enough pages to satisfy @len. Page size is determined at + * initialization of the allocator. + * + * The return is actually a pointer to a struct nvgpu_page_alloc pointer. This + * is because it doesn't make a lot of sense to return the address of the first + * page in the list of pages (since they could be discontiguous). This has + * precedent in the dma_alloc APIs, though, it's really just an annoying + * artifact of the fact that the nvgpu_alloc() API requires a u64 return type. + */ +static u64 nvgpu_page_alloc(struct nvgpu_allocator *__a, u64 len) +{ + struct nvgpu_page_allocator *a = page_allocator(__a); + struct nvgpu_page_alloc *alloc = NULL; + u64 real_len; + + /* + * If we want contig pages we have to round up to a power of two. It's + * easier to do that here than in the buddy allocator. + */ + real_len = a->flags & GPU_ALLOC_FORCE_CONTIG ? + roundup_pow_of_two(len) : len; + + alloc_lock(__a); + if (a->flags & GPU_ALLOC_4K_VIDMEM_PAGES && + real_len <= (a->page_size / 2)) + alloc = __nvgpu_alloc_slab(a, real_len); + else + alloc = __nvgpu_alloc_pages(a, real_len); + + if (!alloc) { + alloc_unlock(__a); + return 0; + } + + __insert_page_alloc(a, alloc); + + a->nr_allocs++; + if (real_len > a->page_size / 2) + a->pages_alloced += alloc->length >> a->page_shift; + alloc_unlock(__a); + + if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) + return alloc->base; + else + return (u64) (uintptr_t) alloc; +} + +/* + * Note: this will remove the nvgpu_page_alloc struct from the RB tree + * if it's found. + */ +static void nvgpu_page_free(struct nvgpu_allocator *__a, u64 base) +{ + struct nvgpu_page_allocator *a = page_allocator(__a); + struct nvgpu_page_alloc *alloc; + + alloc_lock(__a); + + if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) + alloc = __find_page_alloc(a, base); + else + alloc = __find_page_alloc(a, + ((struct nvgpu_page_alloc *)(uintptr_t)base)->base); + + if (!alloc) { + palloc_dbg(a, "Hrm, found no alloc?\n"); + goto done; + } + + a->nr_frees++; + + palloc_dbg(a, "Free 0x%llx id=0x%010llx\n", + alloc->length, alloc->base); + + /* + * Frees *alloc. + */ + if (alloc->slab_page) { + __nvgpu_free_slab(a, alloc); + } else { + a->pages_freed += (alloc->length >> a->page_shift); + __nvgpu_free_pages(a, alloc, true); + } + +done: + alloc_unlock(__a); +} + +static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed( + struct nvgpu_page_allocator *a, u64 base, u64 length) +{ + struct nvgpu_page_alloc *alloc; + struct page_alloc_chunk *c; + + alloc = kmem_cache_alloc(page_alloc_cache, GFP_KERNEL); + c = kmem_cache_alloc(page_alloc_chunk_cache, GFP_KERNEL); + if (!alloc || !c) + goto fail; + + alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length); + if (!alloc->base) { + WARN(1, "nvgpu: failed to fixed alloc pages @ 0x%010llx", base); + goto fail; + } + + alloc->nr_chunks = 1; + alloc->length = length; + INIT_LIST_HEAD(&alloc->alloc_chunks); + + c->base = alloc->base; + c->length = length; + list_add(&c->list_entry, &alloc->alloc_chunks); + + return alloc; + +fail: + kfree(c); + kfree(alloc); + return ERR_PTR(-ENOMEM); +} + +static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a, + u64 base, u64 len) +{ + struct nvgpu_page_allocator *a = page_allocator(__a); + struct nvgpu_page_alloc *alloc = NULL; + struct page_alloc_chunk *c; + u64 aligned_len, pages; + int i = 0; + + aligned_len = ALIGN(len, a->page_size); + pages = aligned_len >> a->page_shift; + + alloc_lock(__a); + + alloc = __nvgpu_alloc_pages_fixed(a, base, aligned_len); + if (IS_ERR(alloc)) { + alloc_unlock(__a); + return 0; + } + + __insert_page_alloc(a, alloc); + alloc_unlock(__a); + + palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)\n", + alloc->base, aligned_len, pages); + list_for_each_entry(c, &alloc->alloc_chunks, list_entry) { + palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", + i++, c->base, c->length); + } + + a->nr_fixed_allocs++; + a->pages_alloced += pages; + + if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) + return alloc->base; + else + return (u64) (uintptr_t) alloc; +} + +static void nvgpu_page_free_fixed(struct nvgpu_allocator *__a, + u64 base, u64 len) +{ + struct nvgpu_page_allocator *a = page_allocator(__a); + struct nvgpu_page_alloc *alloc; + + alloc_lock(__a); + + if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) { + alloc = __find_page_alloc(a, base); + if (!alloc) + goto done; + } else { + alloc = (struct nvgpu_page_alloc *) (uintptr_t) base; + } + + palloc_dbg(a, "Free [fixed] 0x%010llx + 0x%llx\n", + alloc->base, alloc->length); + + a->nr_fixed_frees++; + a->pages_freed += (alloc->length >> a->page_shift); + + /* + * This works for the time being since the buddy allocator + * uses the same free function for both fixed and regular + * allocs. This would have to be updated if the underlying + * allocator were to change. + */ + __nvgpu_free_pages(a, alloc, true); + +done: + alloc_unlock(__a); +} + +static void nvgpu_page_allocator_destroy(struct nvgpu_allocator *__a) +{ + struct nvgpu_page_allocator *a = page_allocator(__a); + + alloc_lock(__a); + kfree(a); + __a->priv = NULL; + alloc_unlock(__a); +} + +static void nvgpu_page_print_stats(struct nvgpu_allocator *__a, + struct seq_file *s, int lock) +{ + struct nvgpu_page_allocator *a = page_allocator(__a); + int i; + + if (lock) + alloc_lock(__a); + + __alloc_pstat(s, __a, "Page allocator:\n"); + __alloc_pstat(s, __a, " allocs %lld\n", a->nr_allocs); + __alloc_pstat(s, __a, " frees %lld\n", a->nr_frees); + __alloc_pstat(s, __a, " fixed_allocs %lld\n", a->nr_fixed_allocs); + __alloc_pstat(s, __a, " fixed_frees %lld\n", a->nr_fixed_frees); + __alloc_pstat(s, __a, " slab_allocs %lld\n", a->nr_slab_allocs); + __alloc_pstat(s, __a, " slab_frees %lld\n", a->nr_slab_frees); + __alloc_pstat(s, __a, " pages alloced %lld\n", a->pages_alloced); + __alloc_pstat(s, __a, " pages freed %lld\n", a->pages_freed); + __alloc_pstat(s, __a, "\n"); + + /* + * Slab info. + */ + if (a->flags & GPU_ALLOC_4K_VIDMEM_PAGES) { + __alloc_pstat(s, __a, "Slabs:\n"); + __alloc_pstat(s, __a, " size empty partial full\n"); + __alloc_pstat(s, __a, " ---- ----- ------- ----\n"); + + for (i = 0; i < a->nr_slabs; i++) { + struct page_alloc_slab *slab = &a->slabs[i]; + + __alloc_pstat(s, __a, " %-9u %-9d %-9u %u\n", + slab->slab_size, + slab->nr_empty, slab->nr_partial, + slab->nr_full); + } + __alloc_pstat(s, __a, "\n"); + } + + __alloc_pstat(s, __a, "Source alloc: %s\n", + a->source_allocator.name); + nvgpu_alloc_print_stats(&a->source_allocator, s, lock); + + if (lock) + alloc_unlock(__a); +} + +static const struct nvgpu_allocator_ops page_ops = { + .alloc = nvgpu_page_alloc, + .free = nvgpu_page_free, + + .alloc_fixed = nvgpu_page_alloc_fixed, + .free_fixed = nvgpu_page_free_fixed, + + .reserve_carveout = nvgpu_page_reserve_co, + .release_carveout = nvgpu_page_release_co, + + .base = nvgpu_page_alloc_base, + .length = nvgpu_page_alloc_length, + .end = nvgpu_page_alloc_end, + .inited = nvgpu_page_alloc_inited, + .space = nvgpu_page_alloc_space, + + .fini = nvgpu_page_allocator_destroy, + + .print_stats = nvgpu_page_print_stats, +}; + +/* + * nr_slabs is computed as follows: divide page_size by 4096 to get number of + * 4k pages in page_size. Then take the base 2 log of that to get number of + * slabs. For 64k page_size that works on like: + * + * 1024*64 / 1024*4 = 16 + * ilog2(16) = 4 + * + * That gives buckets of 1, 2, 4, and 8 pages (i.e 4k, 8k, 16k, 32k). + */ +static int nvgpu_page_alloc_init_slabs(struct nvgpu_page_allocator *a) +{ + size_t nr_slabs = ilog2(a->page_size >> 12); + unsigned int i; + + a->slabs = kcalloc(nr_slabs, + sizeof(struct page_alloc_slab), + GFP_KERNEL); + if (!a->slabs) + return -ENOMEM; + a->nr_slabs = nr_slabs; + + for (i = 0; i < nr_slabs; i++) { + struct page_alloc_slab *slab = &a->slabs[i]; + + slab->slab_size = SZ_4K * (1 << i); + INIT_LIST_HEAD(&slab->empty); + INIT_LIST_HEAD(&slab->partial); + INIT_LIST_HEAD(&slab->full); + slab->nr_empty = 0; + slab->nr_partial = 0; + slab->nr_full = 0; + } + + return 0; +} + +int nvgpu_page_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a, + const char *name, u64 base, u64 length, + u64 blk_size, u64 flags) +{ + struct nvgpu_page_allocator *a; + char buddy_name[sizeof(__a->name)]; + int err; + + mutex_lock(&meta_data_cache_lock); + if (!page_alloc_cache) + page_alloc_cache = KMEM_CACHE(nvgpu_page_alloc, 0); + if (!page_alloc_chunk_cache) + page_alloc_chunk_cache = KMEM_CACHE(page_alloc_chunk, 0); + if (!page_alloc_slab_page_cache) + page_alloc_slab_page_cache = + KMEM_CACHE(page_alloc_slab_page, 0); + mutex_unlock(&meta_data_cache_lock); + + if (!page_alloc_cache || !page_alloc_chunk_cache) + return -ENOMEM; + + if (blk_size < SZ_4K) + return -EINVAL; + + a = kzalloc(sizeof(struct nvgpu_page_allocator), GFP_KERNEL); + if (!a) + return -ENOMEM; + + err = __nvgpu_alloc_common_init(__a, name, a, false, &page_ops); + if (err) + goto fail; + + a->base = base; + a->length = length; + a->page_size = blk_size; + a->page_shift = __ffs(blk_size); + a->allocs = RB_ROOT; + a->owner = __a; + a->flags = flags; + + if (flags & GPU_ALLOC_4K_VIDMEM_PAGES && blk_size > SZ_4K) { + err = nvgpu_page_alloc_init_slabs(a); + if (err) + goto fail; + } + + snprintf(buddy_name, sizeof(buddy_name), "%s-src", name); + + err = nvgpu_buddy_allocator_init(g, &a->source_allocator, buddy_name, + base, length, blk_size, 0); + if (err) + goto fail; + + nvgpu_init_alloc_debug(g, __a); + palloc_dbg(a, "New allocator: type page\n"); + palloc_dbg(a, " base 0x%llx\n", a->base); + palloc_dbg(a, " size 0x%llx\n", a->length); + palloc_dbg(a, " page_size 0x%llx\n", a->page_size); + palloc_dbg(a, " flags 0x%llx\n", a->flags); + palloc_dbg(a, " slabs: %d\n", a->nr_slabs); + + return 0; + +fail: + kfree(a); + return err; +} diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c index 0b90090a..07601d42 100644 --- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c @@ -279,17 +279,17 @@ static int gk20a_as_ioctl_get_va_regions( for (i = 0; i < write_entries; ++i) { struct nvgpu_as_va_region region; - struct gk20a_allocator *vma = - gk20a_alloc_initialized(&vm->fixed) ? + struct nvgpu_allocator *vma = + nvgpu_alloc_initialized(&vm->fixed) ? &vm->fixed : &vm->vma[i]; memset(®ion, 0, sizeof(struct nvgpu_as_va_region)); region.page_size = vm->gmmu_page_sizes[i]; - region.offset = gk20a_alloc_base(vma); + region.offset = nvgpu_alloc_base(vma); /* No __aeabi_uldivmod() on some platforms... */ - region.pages = (gk20a_alloc_end(vma) - - gk20a_alloc_base(vma)) >> ilog2(region.page_size); + region.pages = (nvgpu_alloc_end(vma) - + nvgpu_alloc_base(vma)) >> ilog2(region.page_size); if (copy_to_user(user_region_ptr + i, ®ion, sizeof(region))) return -EFAULT; diff --git a/drivers/gpu/nvgpu/gk20a/bitmap_allocator_priv.h b/drivers/gpu/nvgpu/gk20a/bitmap_allocator_priv.h deleted file mode 100644 index a686b704..00000000 --- a/drivers/gpu/nvgpu/gk20a/bitmap_allocator_priv.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef BITMAP_ALLOCATOR_PRIV_H -#define BITMAP_ALLOCATOR_PRIV_H - -#include - -struct gk20a_allocator; - -struct gk20a_bitmap_allocator { - struct gk20a_allocator *owner; - - u64 base; /* Base address of the space. */ - u64 length; /* Length of the space. */ - u64 blk_size; /* Size that corresponds to 1 bit. */ - u64 blk_shift; /* Bit shift to divide by blk_size. */ - u64 num_bits; /* Number of allocatable bits. */ - u64 bit_offs; /* Offset of bitmap. */ - - /* - * Optimization for making repeated allocations faster. Keep track of - * the next bit after the most recent allocation. This is where the next - * search will start from. This should make allocation faster in cases - * where lots of allocations get made one after another. It shouldn't - * have a negative impact on the case where the allocator is fragmented. - */ - u64 next_blk; - - unsigned long *bitmap; /* The actual bitmap! */ - struct rb_root allocs; /* Tree of outstanding allocations. */ - - u64 flags; - - bool inited; - - /* Statistics */ - u64 nr_allocs; - u64 nr_fixed_allocs; - u64 bytes_alloced; - u64 bytes_freed; -}; - -struct gk20a_bitmap_alloc { - u64 base; - u64 length; - struct rb_node alloc_entry; /* RB tree of allocations. */ -}; - -static inline struct gk20a_bitmap_allocator *bitmap_allocator( - struct gk20a_allocator *a) -{ - return (struct gk20a_bitmap_allocator *)(a)->priv; -} - - -#endif diff --git a/drivers/gpu/nvgpu/gk20a/buddy_allocator_priv.h b/drivers/gpu/nvgpu/gk20a/buddy_allocator_priv.h deleted file mode 100644 index bb8b307b..00000000 --- a/drivers/gpu/nvgpu/gk20a/buddy_allocator_priv.h +++ /dev/null @@ -1,192 +0,0 @@ -/* - * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef BUDDY_ALLOCATOR_PRIV_H -#define BUDDY_ALLOCATOR_PRIV_H - -#include -#include - -struct gk20a_allocator; -struct vm_gk20a; - -/* - * Each buddy is an element in a binary tree. - */ -struct gk20a_buddy { - struct gk20a_buddy *parent; /* Parent node. */ - struct gk20a_buddy *buddy; /* This node's buddy. */ - struct gk20a_buddy *left; /* Lower address sub-node. */ - struct gk20a_buddy *right; /* Higher address sub-node. */ - - struct list_head buddy_entry; /* List entry for various lists. */ - struct rb_node alloced_entry; /* RB tree of allocations. */ - - u64 start; /* Start address of this buddy. */ - u64 end; /* End address of this buddy. */ - u64 order; /* Buddy order. */ - -#define BALLOC_BUDDY_ALLOCED 0x1 -#define BALLOC_BUDDY_SPLIT 0x2 -#define BALLOC_BUDDY_IN_LIST 0x4 - int flags; /* List of associated flags. */ - - /* - * Size of the PDE this buddy is using. This allows for grouping like - * sized allocations into the same PDE. This uses the gmmu_pgsz_gk20a - * enum except for the BALLOC_PTE_SIZE_ANY specifier. - */ -#define BALLOC_PTE_SIZE_ANY -1 - int pte_size; -}; - -#define __buddy_flag_ops(flag, flag_up) \ - static inline int buddy_is_ ## flag(struct gk20a_buddy *b) \ - { \ - return b->flags & BALLOC_BUDDY_ ## flag_up; \ - } \ - static inline void buddy_set_ ## flag(struct gk20a_buddy *b) \ - { \ - b->flags |= BALLOC_BUDDY_ ## flag_up; \ - } \ - static inline void buddy_clr_ ## flag(struct gk20a_buddy *b) \ - { \ - b->flags &= ~BALLOC_BUDDY_ ## flag_up; \ - } - -/* - * int buddy_is_alloced(struct gk20a_buddy *b); - * void buddy_set_alloced(struct gk20a_buddy *b); - * void buddy_clr_alloced(struct gk20a_buddy *b); - * - * int buddy_is_split(struct gk20a_buddy *b); - * void buddy_set_split(struct gk20a_buddy *b); - * void buddy_clr_split(struct gk20a_buddy *b); - * - * int buddy_is_in_list(struct gk20a_buddy *b); - * void buddy_set_in_list(struct gk20a_buddy *b); - * void buddy_clr_in_list(struct gk20a_buddy *b); - */ -__buddy_flag_ops(alloced, ALLOCED); -__buddy_flag_ops(split, SPLIT); -__buddy_flag_ops(in_list, IN_LIST); - -/* - * Keeps info for a fixed allocation. - */ -struct gk20a_fixed_alloc { - struct list_head buddies; /* List of buddies. */ - struct rb_node alloced_entry; /* RB tree of fixed allocations. */ - - u64 start; /* Start of fixed block. */ - u64 end; /* End address. */ -}; - -/* - * GPU buddy allocator for the various GPU address spaces. Each addressable unit - * doesn't have to correspond to a byte. In some cases each unit is a more - * complex object such as a comp_tag line or the like. - * - * The max order is computed based on the size of the minimum order and the size - * of the address space. - * - * order_size is the size of an order 0 buddy. - */ -struct gk20a_buddy_allocator { - struct gk20a_allocator *owner; /* Owner of this buddy allocator. */ - struct vm_gk20a *vm; /* Parent VM - can be NULL. */ - - u64 base; /* Base address of the space. */ - u64 length; /* Length of the space. */ - u64 blk_size; /* Size of order 0 allocation. */ - u64 blk_shift; /* Shift to divide by blk_size. */ - - /* Internal stuff. */ - u64 start; /* Real start (aligned to blk_size). */ - u64 end; /* Real end, trimmed if needed. */ - u64 count; /* Count of objects in space. */ - u64 blks; /* Count of blks in the space. */ - u64 max_order; /* Specific maximum order. */ - - struct rb_root alloced_buddies; /* Outstanding allocations. */ - struct rb_root fixed_allocs; /* Outstanding fixed allocations. */ - - struct list_head co_list; - - /* - * Impose an upper bound on the maximum order. - */ -#define GPU_BALLOC_ORDER_LIST_LEN (GPU_BALLOC_MAX_ORDER + 1) - - struct list_head buddy_list[GPU_BALLOC_ORDER_LIST_LEN]; - u64 buddy_list_len[GPU_BALLOC_ORDER_LIST_LEN]; - u64 buddy_list_split[GPU_BALLOC_ORDER_LIST_LEN]; - u64 buddy_list_alloced[GPU_BALLOC_ORDER_LIST_LEN]; - - /* - * This is for when the allocator is managing a GVA space (the - * GPU_ALLOC_GVA_SPACE bit is set in @flags). This requires - * that we group like sized allocations into PDE blocks. - */ - u64 pte_blk_order; - - int initialized; - int alloc_made; /* True after the first alloc. */ - - u64 flags; - - u64 bytes_alloced; - u64 bytes_alloced_real; - u64 bytes_freed; -}; - -static inline struct gk20a_buddy_allocator *buddy_allocator( - struct gk20a_allocator *a) -{ - return (struct gk20a_buddy_allocator *)(a)->priv; -} - -static inline struct list_head *balloc_get_order_list( - struct gk20a_buddy_allocator *a, int order) -{ - return &a->buddy_list[order]; -} - -static inline u64 balloc_order_to_len(struct gk20a_buddy_allocator *a, - int order) -{ - return (1 << order) * a->blk_size; -} - -static inline u64 balloc_base_shift(struct gk20a_buddy_allocator *a, - u64 base) -{ - return base - a->start; -} - -static inline u64 balloc_base_unshift(struct gk20a_buddy_allocator *a, - u64 base) -{ - return base + a->start; -} - -static inline struct gk20a_allocator *balloc_owner( - struct gk20a_buddy_allocator *a) -{ - return a->owner; -} - -#endif diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 136c28d0..be01e0e9 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -976,7 +976,7 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force) memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub)); gk20a_gmmu_unmap_free(ch_vm, &ch->gpfifo.mem); - nvgpu_free(ch->gpfifo.pipe); + nvgpu_kfree(ch->gpfifo.pipe); memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc)); #if defined(CONFIG_GK20A_CYCLE_STATS) @@ -1778,7 +1778,7 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, } if (c->gpfifo.mem.aperture == APERTURE_VIDMEM || g->mm.force_pramin) { - c->gpfifo.pipe = nvgpu_alloc( + c->gpfifo.pipe = nvgpu_kalloc( gpfifo_size * sizeof(struct nvgpu_gpfifo), false); if (!c->gpfifo.pipe) { @@ -1850,7 +1850,7 @@ clean_up_sync: c->sync = NULL; } clean_up_unmap: - nvgpu_free(c->gpfifo.pipe); + nvgpu_kfree(c->gpfifo.pipe); gk20a_gmmu_unmap_free(ch_vm, &c->gpfifo.mem); clean_up: memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc)); @@ -1980,12 +1980,12 @@ static void trace_write_pushbuffer_range(struct channel_gk20a *c, if (!g) { size = count * sizeof(struct nvgpu_gpfifo); if (size) { - g = nvgpu_alloc(size, false); + g = nvgpu_kalloc(size, false); if (!g) return; if (copy_from_user(g, user_gpfifo, size)) { - nvgpu_free(g); + nvgpu_kfree(g); return; } } @@ -1997,7 +1997,7 @@ static void trace_write_pushbuffer_range(struct channel_gk20a *c, trace_write_pushbuffer(c, gp); if (gpfifo_allocated) - nvgpu_free(g); + nvgpu_kfree(g); } static void gk20a_channel_timeout_start(struct channel_gk20a *ch, diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 0a0d94b7..697d1603 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h @@ -143,7 +143,7 @@ struct channel_gk20a { struct list_head ch_entry; /* channel's entry in TSG */ struct channel_gk20a_joblist joblist; - struct gk20a_allocator fence_allocator; + struct nvgpu_allocator fence_allocator; struct vm_gk20a *vm; diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index e5529295..ac96036f 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c @@ -815,7 +815,7 @@ static int nvgpu_dbg_gpu_ioctl_access_fb_memory(struct dbg_session_gk20a *dbg_s, goto fail_dmabuf_put; } - buffer = nvgpu_alloc(access_limit_size, true); + buffer = nvgpu_kalloc(access_limit_size, true); if (!buffer) { err = -ENOMEM; goto fail_dmabuf_put; @@ -861,7 +861,7 @@ static int nvgpu_dbg_gpu_ioctl_access_fb_memory(struct dbg_session_gk20a *dbg_s, fail_idle: gk20a_idle(g->dev); fail_free_buffer: - nvgpu_free(buffer); + nvgpu_kfree(buffer); fail_dmabuf_put: dma_buf_put(dmabuf); diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c index b84db933..8fa108c2 100644 --- a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c @@ -477,7 +477,7 @@ void gk20a_debug_init(struct device *dev, const char *debugfs_symlink) gk20a_railgating_debugfs_init(g->dev); gk20a_cde_debugfs_init(g->dev); gk20a_ce_debugfs_init(g->dev); - gk20a_alloc_debugfs_init(g->dev); + nvgpu_alloc_debugfs_init(g->dev); gk20a_mm_debugfs_init(g->dev); gk20a_fifo_debugfs_init(g->dev); gk20a_sched_debugfs_init(g->dev); diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c index 323caa8f..b8a1dcbc 100644 --- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c @@ -49,8 +49,8 @@ static void gk20a_fence_free(struct kref *ref) gk20a_semaphore_put(f->semaphore); if (f->allocator) { - if (gk20a_alloc_initialized(f->allocator)) - gk20a_free(f->allocator, (size_t)f); + if (nvgpu_alloc_initialized(f->allocator)) + nvgpu_free(f->allocator, (size_t)f); } else kfree(f); } @@ -129,7 +129,7 @@ int gk20a_alloc_fence_pool(struct channel_gk20a *c, unsigned int count) if (!fence_pool) return -ENOMEM; - err = gk20a_lockless_allocator_init(c->g, &c->fence_allocator, + err = nvgpu_lockless_allocator_init(c->g, &c->fence_allocator, "fence_pool", (size_t)fence_pool, size, sizeof(struct gk20a_fence), 0); if (err) @@ -144,11 +144,11 @@ fail: void gk20a_free_fence_pool(struct channel_gk20a *c) { - if (gk20a_alloc_initialized(&c->fence_allocator)) { + if (nvgpu_alloc_initialized(&c->fence_allocator)) { void *base = (void *)(uintptr_t) - gk20a_alloc_base(&c->fence_allocator); + nvgpu_alloc_base(&c->fence_allocator); - gk20a_alloc_destroy(&c->fence_allocator); + nvgpu_alloc_destroy(&c->fence_allocator); vfree(base); } } @@ -158,9 +158,9 @@ struct gk20a_fence *gk20a_alloc_fence(struct channel_gk20a *c) struct gk20a_fence *fence = NULL; if (channel_gk20a_is_prealloc_enabled(c)) { - if (gk20a_alloc_initialized(&c->fence_allocator)) { + if (nvgpu_alloc_initialized(&c->fence_allocator)) { fence = (struct gk20a_fence *)(uintptr_t) - gk20a_alloc(&c->fence_allocator, + nvgpu_alloc(&c->fence_allocator, sizeof(struct gk20a_fence)); /* clear the node and reset the allocator pointer */ diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.h b/drivers/gpu/nvgpu/gk20a/fence_gk20a.h index beba761a..f38fcbe7 100644 --- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.h @@ -47,7 +47,7 @@ struct gk20a_fence { u32 syncpt_value; /* Valid for fences part of a pre-allocated fence pool */ - struct gk20a_allocator *allocator; + struct nvgpu_allocator *allocator; }; /* Fences can be created from semaphores or syncpoint (id, value) pairs */ diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index b1e90bd8..753f031a 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -43,6 +43,8 @@ #include #include +#include + #include "gk20a.h" #include "nvgpu_common.h" #include "debug_gk20a.h" @@ -60,7 +62,6 @@ #include "gk20a_scale.h" #include "ctxsw_trace_gk20a.h" #include "dbg_gpu_gk20a.h" -#include "gk20a_allocator.h" #include "hal.h" #include "vgpu/vgpu.h" #include "pci.h" diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c deleted file mode 100644 index 3129b07c..00000000 --- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c +++ /dev/null @@ -1,211 +0,0 @@ -/* - * gk20a allocator - * - * Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include - -#include "gk20a.h" -#include "mm_gk20a.h" -#include "platform_gk20a.h" -#include "gk20a_allocator.h" - -u32 gk20a_alloc_tracing_on; - -u64 gk20a_alloc_length(struct gk20a_allocator *a) -{ - if (a->ops->length) - return a->ops->length(a); - - return 0; -} - -u64 gk20a_alloc_base(struct gk20a_allocator *a) -{ - if (a->ops->base) - return a->ops->base(a); - - return 0; -} - -u64 gk20a_alloc_initialized(struct gk20a_allocator *a) -{ - if (!a->ops || !a->ops->inited) - return 0; - - return a->ops->inited(a); -} - -u64 gk20a_alloc_end(struct gk20a_allocator *a) -{ - if (a->ops->end) - return a->ops->end(a); - - return 0; -} - -u64 gk20a_alloc_space(struct gk20a_allocator *a) -{ - if (a->ops->space) - return a->ops->space(a); - - return 0; -} - -u64 gk20a_alloc(struct gk20a_allocator *a, u64 len) -{ - return a->ops->alloc(a, len); -} - -void gk20a_free(struct gk20a_allocator *a, u64 addr) -{ - a->ops->free(a, addr); -} - -u64 gk20a_alloc_fixed(struct gk20a_allocator *a, u64 base, u64 len) -{ - if (a->ops->alloc_fixed) - return a->ops->alloc_fixed(a, base, len); - - return 0; -} - -void gk20a_free_fixed(struct gk20a_allocator *a, u64 base, u64 len) -{ - /* - * If this operation is not defined for the allocator then just do - * nothing. The alternative would be to fall back on the regular - * free but that may be harmful in unexpected ways. - */ - if (a->ops->free_fixed) - a->ops->free_fixed(a, base, len); -} - -int gk20a_alloc_reserve_carveout(struct gk20a_allocator *a, - struct gk20a_alloc_carveout *co) -{ - if (a->ops->reserve_carveout) - return a->ops->reserve_carveout(a, co); - - return -ENODEV; -} - -void gk20a_alloc_release_carveout(struct gk20a_allocator *a, - struct gk20a_alloc_carveout *co) -{ - if (a->ops->release_carveout) - a->ops->release_carveout(a, co); -} - -void gk20a_alloc_destroy(struct gk20a_allocator *a) -{ - a->ops->fini(a); - memset(a, 0, sizeof(*a)); -} - -/* - * Handle the common init stuff for a gk20a_allocator. - */ -int __gk20a_alloc_common_init(struct gk20a_allocator *a, - const char *name, void *priv, bool dbg, - const struct gk20a_allocator_ops *ops) -{ - if (!ops) - return -EINVAL; - - /* - * This is the bare minimum operations required for a sensible - * allocator. - */ - if (!ops->alloc || !ops->free || !ops->fini) - return -EINVAL; - - a->ops = ops; - a->priv = priv; - a->debug = dbg; - - mutex_init(&a->lock); - - strlcpy(a->name, name, sizeof(a->name)); - - return 0; -} - -void gk20a_alloc_print_stats(struct gk20a_allocator *__a, - struct seq_file *s, int lock) -{ - __a->ops->print_stats(__a, s, lock); -} - -#ifdef CONFIG_DEBUG_FS -static int __alloc_show(struct seq_file *s, void *unused) -{ - struct gk20a_allocator *a = s->private; - - gk20a_alloc_print_stats(a, s, 1); - - return 0; -} - -static int __alloc_open(struct inode *inode, struct file *file) -{ - return single_open(file, __alloc_show, inode->i_private); -} - -static const struct file_operations __alloc_fops = { - .open = __alloc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; -#endif - -void gk20a_init_alloc_debug(struct gk20a *g, struct gk20a_allocator *a) -{ -#ifdef CONFIG_DEBUG_FS - if (!g->debugfs_allocators) - return; - - a->debugfs_entry = debugfs_create_file(a->name, S_IRUGO, - g->debugfs_allocators, - a, &__alloc_fops); -#endif -} - -void gk20a_fini_alloc_debug(struct gk20a_allocator *a) -{ -#ifdef CONFIG_DEBUG_FS - if (!IS_ERR_OR_NULL(a->debugfs_entry)) - debugfs_remove(a->debugfs_entry); -#endif -} - -void gk20a_alloc_debugfs_init(struct device *dev) -{ -#ifdef CONFIG_DEBUG_FS - struct gk20a_platform *platform = dev_get_drvdata(dev); - struct dentry *gpu_root = platform->debugfs; - struct gk20a *g = get_gk20a(dev); - - g->debugfs_allocators = debugfs_create_dir("allocators", gpu_root); - if (IS_ERR_OR_NULL(g->debugfs_allocators)) - return; - - debugfs_create_u32("tracing", 0664, g->debugfs_allocators, - &gk20a_alloc_tracing_on); -#endif -} diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h deleted file mode 100644 index b12926b3..00000000 --- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h +++ /dev/null @@ -1,302 +0,0 @@ -/* - * Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef GK20A_ALLOCATOR_H -#define GK20A_ALLOCATOR_H - -#include -#include -#include - -/* #define ALLOCATOR_DEBUG */ - -struct gk20a_allocator; -struct gk20a_alloc_carveout; -struct vm_gk20a; -struct gk20a; - -/* - * Operations for an allocator to implement. - */ -struct gk20a_allocator_ops { - u64 (*alloc)(struct gk20a_allocator *allocator, u64 len); - void (*free)(struct gk20a_allocator *allocator, u64 addr); - - /* - * Special interface to allocate a memory region with a specific - * starting address. Yikes. Note: if free() works for freeing both - * regular and fixed allocations then free_fixed() does not need to - * be implemented. This behavior exists for legacy reasons and should - * not be propagated to new allocators. - */ - u64 (*alloc_fixed)(struct gk20a_allocator *allocator, - u64 base, u64 len); - void (*free_fixed)(struct gk20a_allocator *allocator, - u64 base, u64 len); - - /* - * Allow allocators to reserve space for carveouts. - */ - int (*reserve_carveout)(struct gk20a_allocator *allocator, - struct gk20a_alloc_carveout *co); - void (*release_carveout)(struct gk20a_allocator *allocator, - struct gk20a_alloc_carveout *co); - - /* - * Returns info about the allocator. - */ - u64 (*base)(struct gk20a_allocator *allocator); - u64 (*length)(struct gk20a_allocator *allocator); - u64 (*end)(struct gk20a_allocator *allocator); - int (*inited)(struct gk20a_allocator *allocator); - u64 (*space)(struct gk20a_allocator *allocator); - - /* Destructor. */ - void (*fini)(struct gk20a_allocator *allocator); - - /* Debugging. */ - void (*print_stats)(struct gk20a_allocator *allocator, - struct seq_file *s, int lock); -}; - -struct gk20a_allocator { - char name[32]; - struct mutex lock; - - void *priv; - const struct gk20a_allocator_ops *ops; - - struct dentry *debugfs_entry; - bool debug; /* Control for debug msgs. */ -}; - -struct gk20a_alloc_carveout { - const char *name; - u64 base; - u64 length; - - struct gk20a_allocator *allocator; - - /* - * For usage by the allocator implementation. - */ - struct list_head co_entry; -}; - -#define GK20A_CARVEOUT(__name, __base, __length) \ - { \ - .name = (__name), \ - .base = (__base), \ - .length = (__length) \ - } - -/* - * These are the available allocator flags. - * - * GPU_ALLOC_GVA_SPACE - * - * This flag makes sense for the buddy allocator only. It specifies that the - * allocator will be used for managing a GVA space. When managing GVA spaces - * special care has to be taken to ensure that allocations of similar PTE - * sizes are placed in the same PDE block. This allows the higher level - * code to skip defining both small and large PTE tables for every PDE. That - * can save considerable memory for address spaces that have a lot of - * allocations. - * - * GPU_ALLOC_NO_ALLOC_PAGE - * - * For any allocator that needs to manage a resource in a latency critical - * path this flag specifies that the allocator should not use any kmalloc() - * or similar functions during normal operation. Initialization routines - * may still use kmalloc(). This prevents the possibility of long waits for - * pages when using alloc_page(). Currently only the bitmap allocator - * implements this functionality. - * - * Also note that if you accept this flag then you must also define the - * free_fixed() function. Since no meta-data is allocated to help free - * allocations you need to keep track of the meta-data yourself (in this - * case the base and length of the allocation as opposed to just the base - * of the allocation). - * - * GPU_ALLOC_4K_VIDMEM_PAGES - * - * We manage vidmem pages at a large page granularity for performance - * reasons; however, this can lead to wasting memory. For page allocators - * setting this flag will tell the allocator to manage pools of 4K pages - * inside internally allocated large pages. - * - * Currently this flag is ignored since the only usage of the page allocator - * uses a 4K block size already. However, this flag has been reserved since - * it will be necessary in the future. - * - * GPU_ALLOC_FORCE_CONTIG - * - * Force allocations to be contiguous. Currently only relevant for page - * allocators since all other allocators are naturally contiguous. - * - * GPU_ALLOC_NO_SCATTER_GATHER - * - * The page allocator normally returns a scatter gather data structure for - * allocations (to handle discontiguous pages). However, at times that can - * be annoying so this flag forces the page allocator to return a u64 - * pointing to the allocation base (requires GPU_ALLOC_FORCE_CONTIG to be - * set as well). - */ -#define GPU_ALLOC_GVA_SPACE 0x1 -#define GPU_ALLOC_NO_ALLOC_PAGE 0x2 -#define GPU_ALLOC_4K_VIDMEM_PAGES 0x4 -#define GPU_ALLOC_FORCE_CONTIG 0x8 -#define GPU_ALLOC_NO_SCATTER_GATHER 0x10 - -static inline void alloc_lock(struct gk20a_allocator *a) -{ - mutex_lock(&a->lock); -} - -static inline void alloc_unlock(struct gk20a_allocator *a) -{ - mutex_unlock(&a->lock); -} - -/* - * Buddy allocator specific initializers. - */ -int __gk20a_buddy_allocator_init(struct gk20a *g, struct gk20a_allocator *a, - struct vm_gk20a *vm, const char *name, - u64 base, u64 size, u64 blk_size, - u64 max_order, u64 flags); -int gk20a_buddy_allocator_init(struct gk20a *g, struct gk20a_allocator *a, - const char *name, u64 base, u64 size, - u64 blk_size, u64 flags); - -/* - * Bitmap initializers. - */ -int gk20a_bitmap_allocator_init(struct gk20a *g, struct gk20a_allocator *a, - const char *name, u64 base, u64 length, - u64 blk_size, u64 flags); - -/* - * Page allocator initializers. - */ -int gk20a_page_allocator_init(struct gk20a *g, struct gk20a_allocator *a, - const char *name, u64 base, u64 length, - u64 blk_size, u64 flags); - -/* - * Lockless allocatior initializers. - * Note: This allocator can only allocate fixed-size structures of a - * pre-defined size. - */ -int gk20a_lockless_allocator_init(struct gk20a *g, struct gk20a_allocator *a, - const char *name, u64 base, u64 length, - u64 struct_size, u64 flags); - -#define GPU_BALLOC_MAX_ORDER 31 - -/* - * Allocator APIs. - */ -u64 gk20a_alloc(struct gk20a_allocator *allocator, u64 len); -void gk20a_free(struct gk20a_allocator *allocator, u64 addr); - -u64 gk20a_alloc_fixed(struct gk20a_allocator *allocator, u64 base, u64 len); -void gk20a_free_fixed(struct gk20a_allocator *allocator, u64 base, u64 len); - -int gk20a_alloc_reserve_carveout(struct gk20a_allocator *a, - struct gk20a_alloc_carveout *co); -void gk20a_alloc_release_carveout(struct gk20a_allocator *a, - struct gk20a_alloc_carveout *co); - -u64 gk20a_alloc_base(struct gk20a_allocator *a); -u64 gk20a_alloc_length(struct gk20a_allocator *a); -u64 gk20a_alloc_end(struct gk20a_allocator *a); -u64 gk20a_alloc_initialized(struct gk20a_allocator *a); -u64 gk20a_alloc_space(struct gk20a_allocator *a); - -void gk20a_alloc_destroy(struct gk20a_allocator *allocator); - -void gk20a_alloc_print_stats(struct gk20a_allocator *a, - struct seq_file *s, int lock); - -/* - * Common functionality for the internals of the allocators. - */ -void gk20a_init_alloc_debug(struct gk20a *g, struct gk20a_allocator *a); -void gk20a_fini_alloc_debug(struct gk20a_allocator *a); - -int __gk20a_alloc_common_init(struct gk20a_allocator *a, - const char *name, void *priv, bool dbg, - const struct gk20a_allocator_ops *ops); - -static inline void gk20a_alloc_enable_dbg(struct gk20a_allocator *a) -{ - a->debug = true; -} - -static inline void gk20a_alloc_disable_dbg(struct gk20a_allocator *a) -{ - a->debug = false; -} - -/* - * Debug stuff. - */ -extern u32 gk20a_alloc_tracing_on; - -void gk20a_alloc_debugfs_init(struct device *dev); - -#define gk20a_alloc_trace_func() \ - do { \ - if (gk20a_alloc_tracing_on) \ - trace_printk("%s\n", __func__); \ - } while (0) - -#define gk20a_alloc_trace_func_done() \ - do { \ - if (gk20a_alloc_tracing_on) \ - trace_printk("%s_done\n", __func__); \ - } while (0) - -#define __alloc_pstat(seq, allocator, fmt, arg...) \ - do { \ - if (s) \ - seq_printf(seq, fmt, ##arg); \ - else \ - alloc_dbg(allocator, fmt, ##arg); \ - } while (0) - -#define __alloc_dbg(a, fmt, arg...) \ - pr_info("%-25s %25s() " fmt, (a)->name, __func__, ##arg) - -#if defined(ALLOCATOR_DEBUG) -/* - * Always print the debug messages... - */ -#define alloc_dbg(a, fmt, arg...) __alloc_dbg(a, fmt, ##arg) -#else -/* - * Only print debug messages if debug is enabled for a given allocator. - */ -#define alloc_dbg(a, fmt, arg...) \ - do { \ - if ((a)->debug) \ - __alloc_dbg((a), fmt, ##arg); \ - } while (0) - -#endif - -#endif /* GK20A_ALLOCATOR_H */ diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_bitmap.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_bitmap.c deleted file mode 100644 index f98e0782..00000000 --- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_bitmap.c +++ /dev/null @@ -1,442 +0,0 @@ -/* - * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include - -#include "gk20a_allocator.h" -#include "bitmap_allocator_priv.h" - -static struct kmem_cache *meta_data_cache; /* slab cache for meta data. */ -static DEFINE_MUTEX(meta_data_cache_lock); - -static u64 gk20a_bitmap_alloc_length(struct gk20a_allocator *a) -{ - struct gk20a_bitmap_allocator *ba = a->priv; - - return ba->length; -} - -static u64 gk20a_bitmap_alloc_base(struct gk20a_allocator *a) -{ - struct gk20a_bitmap_allocator *ba = a->priv; - - return ba->base; -} - -static int gk20a_bitmap_alloc_inited(struct gk20a_allocator *a) -{ - struct gk20a_bitmap_allocator *ba = a->priv; - int inited = ba->inited; - - rmb(); - return inited; -} - -static u64 gk20a_bitmap_alloc_end(struct gk20a_allocator *a) -{ - struct gk20a_bitmap_allocator *ba = a->priv; - - return ba->base + ba->length; -} - -static u64 gk20a_bitmap_alloc_fixed(struct gk20a_allocator *__a, - u64 base, u64 len) -{ - struct gk20a_bitmap_allocator *a = bitmap_allocator(__a); - u64 blks, offs, ret; - - /* Compute the bit offset and make sure it's aligned to a block. */ - offs = base >> a->blk_shift; - if (offs * a->blk_size != base) - return 0; - - offs -= a->bit_offs; - - blks = len >> a->blk_shift; - if (blks * a->blk_size != len) - blks++; - - alloc_lock(__a); - - /* Check if the space requested is already occupied. */ - ret = bitmap_find_next_zero_area(a->bitmap, a->num_bits, offs, blks, 0); - if (ret != offs) - goto fail; - - bitmap_set(a->bitmap, offs, blks); - - a->bytes_alloced += blks * a->blk_size; - a->nr_fixed_allocs++; - alloc_unlock(__a); - - alloc_dbg(__a, "Alloc-fixed 0x%-10llx 0x%-5llx [bits=0x%llx (%llu)]\n", - base, len, blks, blks); - return base; - -fail: - alloc_unlock(__a); - alloc_dbg(__a, "Alloc-fixed failed! (0x%llx)\n", base); - return 0; -} - -/* - * Two possibilities for this function: either we are freeing a fixed allocation - * or we are freeing a regular alloc but with GPU_ALLOC_NO_ALLOC_PAGE defined. - * - * Note: this function won't do much error checking. Thus you could really - * confuse the allocator if you misuse this function. - */ -static void gk20a_bitmap_free_fixed(struct gk20a_allocator *__a, - u64 base, u64 len) -{ - struct gk20a_bitmap_allocator *a = bitmap_allocator(__a); - u64 blks, offs; - - offs = base >> a->blk_shift; - if (WARN_ON(offs * a->blk_size != base)) - return; - - offs -= a->bit_offs; - - blks = len >> a->blk_shift; - if (blks * a->blk_size != len) - blks++; - - alloc_lock(__a); - bitmap_clear(a->bitmap, offs, blks); - a->bytes_freed += blks * a->blk_size; - alloc_unlock(__a); - - alloc_dbg(__a, "Free-fixed 0x%-10llx 0x%-5llx [bits=0x%llx (%llu)]\n", - base, len, blks, blks); -} - -/* - * Add the passed alloc to the tree of stored allocations. - */ -static void insert_alloc_metadata(struct gk20a_bitmap_allocator *a, - struct gk20a_bitmap_alloc *alloc) -{ - struct rb_node **new = &a->allocs.rb_node; - struct rb_node *parent = NULL; - struct gk20a_bitmap_alloc *tmp; - - while (*new) { - tmp = container_of(*new, struct gk20a_bitmap_alloc, - alloc_entry); - - parent = *new; - if (alloc->base < tmp->base) - new = &((*new)->rb_left); - else if (alloc->base > tmp->base) - new = &((*new)->rb_right); - else { - WARN_ON("Duplicate entries in RB alloc tree!\n"); - return; - } - } - - rb_link_node(&alloc->alloc_entry, parent, new); - rb_insert_color(&alloc->alloc_entry, &a->allocs); -} - -/* - * Find and remove meta-data from the outstanding allocations. - */ -static struct gk20a_bitmap_alloc *find_alloc_metadata( - struct gk20a_bitmap_allocator *a, u64 addr) -{ - struct rb_node *node = a->allocs.rb_node; - struct gk20a_bitmap_alloc *alloc; - - while (node) { - alloc = container_of(node, struct gk20a_bitmap_alloc, - alloc_entry); - - if (addr < alloc->base) - node = node->rb_left; - else if (addr > alloc->base) - node = node->rb_right; - else - break; - } - - if (!node) - return NULL; - - rb_erase(node, &a->allocs); - - return alloc; -} - -/* - * Tree of alloc meta data stores the address of the alloc not the bit offset. - */ -static int __gk20a_bitmap_store_alloc(struct gk20a_bitmap_allocator *a, - u64 addr, u64 len) -{ - struct gk20a_bitmap_alloc *alloc = - kmem_cache_alloc(meta_data_cache, GFP_KERNEL); - - if (!alloc) - return -ENOMEM; - - alloc->base = addr; - alloc->length = len; - - insert_alloc_metadata(a, alloc); - - return 0; -} - -/* - * @len is in bytes. This routine will figure out the right number of bits to - * actually allocate. The return is the address in bytes as well. - */ -static u64 gk20a_bitmap_alloc(struct gk20a_allocator *__a, u64 len) -{ - u64 blks, addr; - unsigned long offs, adjusted_offs, limit; - struct gk20a_bitmap_allocator *a = bitmap_allocator(__a); - - blks = len >> a->blk_shift; - - if (blks * a->blk_size != len) - blks++; - - alloc_lock(__a); - - /* - * First look from next_blk and onwards... - */ - offs = bitmap_find_next_zero_area(a->bitmap, a->num_bits, - a->next_blk, blks, 0); - if (offs >= a->num_bits) { - /* - * If that didn't work try the remaining area. Since there can - * be available space that spans across a->next_blk we need to - * search up to the first set bit after that. - */ - limit = find_next_bit(a->bitmap, a->num_bits, a->next_blk); - offs = bitmap_find_next_zero_area(a->bitmap, limit, - 0, blks, 0); - if (offs >= a->next_blk) - goto fail; - } - - bitmap_set(a->bitmap, offs, blks); - a->next_blk = offs + blks; - - adjusted_offs = offs + a->bit_offs; - addr = ((u64)adjusted_offs) * a->blk_size; - - /* - * Only do meta-data storage if we are allowed to allocate storage for - * that meta-data. The issue with using kmalloc() and friends is that - * in latency and success critical paths an alloc_page() call can either - * sleep for potentially a long time or, assuming GFP_ATOMIC, fail. - * Since we might not want either of these possibilities assume that the - * caller will keep what data it needs around to successfully free this - * allocation. - */ - if (!(a->flags & GPU_ALLOC_NO_ALLOC_PAGE) && - __gk20a_bitmap_store_alloc(a, addr, blks * a->blk_size)) - goto fail_reset_bitmap; - - alloc_dbg(__a, "Alloc 0x%-10llx 0x%-5llx [bits=0x%llx (%llu)]\n", - addr, len, blks, blks); - - a->nr_allocs++; - a->bytes_alloced += (blks * a->blk_size); - alloc_unlock(__a); - - return addr; - -fail_reset_bitmap: - bitmap_clear(a->bitmap, offs, blks); -fail: - a->next_blk = 0; - alloc_unlock(__a); - alloc_dbg(__a, "Alloc failed!\n"); - return 0; -} - -static void gk20a_bitmap_free(struct gk20a_allocator *__a, u64 addr) -{ - struct gk20a_bitmap_allocator *a = bitmap_allocator(__a); - struct gk20a_bitmap_alloc *alloc = NULL; - u64 offs, adjusted_offs, blks; - - alloc_lock(__a); - - if (a->flags & GPU_ALLOC_NO_ALLOC_PAGE) { - WARN(1, "Using wrong free for NO_ALLOC_PAGE bitmap allocator"); - goto done; - } - - alloc = find_alloc_metadata(a, addr); - if (!alloc) - goto done; - - /* - * Address comes from adjusted offset (i.e the bit offset with - * a->bit_offs added. So start with that and then work out the real - * offs into the bitmap. - */ - adjusted_offs = addr >> a->blk_shift; - offs = adjusted_offs - a->bit_offs; - blks = alloc->length >> a->blk_shift; - - bitmap_clear(a->bitmap, offs, blks); - alloc_dbg(__a, "Free 0x%-10llx\n", addr); - - a->bytes_freed += alloc->length; - -done: - kfree(alloc); - alloc_unlock(__a); -} - -static void gk20a_bitmap_alloc_destroy(struct gk20a_allocator *__a) -{ - struct gk20a_bitmap_allocator *a = bitmap_allocator(__a); - struct gk20a_bitmap_alloc *alloc; - struct rb_node *node; - - /* - * Kill any outstanding allocations. - */ - while ((node = rb_first(&a->allocs)) != NULL) { - alloc = container_of(node, struct gk20a_bitmap_alloc, - alloc_entry); - - rb_erase(node, &a->allocs); - kfree(alloc); - } - - kfree(a->bitmap); - kfree(a); -} - -static void gk20a_bitmap_print_stats(struct gk20a_allocator *__a, - struct seq_file *s, int lock) -{ - struct gk20a_bitmap_allocator *a = bitmap_allocator(__a); - - __alloc_pstat(s, __a, "Bitmap allocator params:\n"); - __alloc_pstat(s, __a, " start = 0x%llx\n", a->base); - __alloc_pstat(s, __a, " end = 0x%llx\n", a->base + a->length); - __alloc_pstat(s, __a, " blks = 0x%llx\n", a->num_bits); - - /* Actual stats. */ - __alloc_pstat(s, __a, "Stats:\n"); - __alloc_pstat(s, __a, " Number allocs = 0x%llx\n", a->nr_allocs); - __alloc_pstat(s, __a, " Number fixed = 0x%llx\n", a->nr_fixed_allocs); - __alloc_pstat(s, __a, " Bytes alloced = 0x%llx\n", a->bytes_alloced); - __alloc_pstat(s, __a, " Bytes freed = 0x%llx\n", a->bytes_freed); - __alloc_pstat(s, __a, " Outstanding = 0x%llx\n", - a->bytes_alloced - a->bytes_freed); -} - -static const struct gk20a_allocator_ops bitmap_ops = { - .alloc = gk20a_bitmap_alloc, - .free = gk20a_bitmap_free, - - .alloc_fixed = gk20a_bitmap_alloc_fixed, - .free_fixed = gk20a_bitmap_free_fixed, - - .base = gk20a_bitmap_alloc_base, - .length = gk20a_bitmap_alloc_length, - .end = gk20a_bitmap_alloc_end, - .inited = gk20a_bitmap_alloc_inited, - - .fini = gk20a_bitmap_alloc_destroy, - - .print_stats = gk20a_bitmap_print_stats, -}; - - -int gk20a_bitmap_allocator_init(struct gk20a *g, struct gk20a_allocator *__a, - const char *name, u64 base, u64 length, - u64 blk_size, u64 flags) -{ - int err; - struct gk20a_bitmap_allocator *a; - - mutex_lock(&meta_data_cache_lock); - if (!meta_data_cache) - meta_data_cache = KMEM_CACHE(gk20a_bitmap_alloc, 0); - mutex_unlock(&meta_data_cache_lock); - - if (!meta_data_cache) - return -ENOMEM; - - if (WARN_ON(blk_size & (blk_size - 1))) - return -EINVAL; - - /* - * blk_size must be a power-of-2; base length also need to be aligned - * to blk_size. - */ - if (blk_size & (blk_size - 1) || - base & (blk_size - 1) || length & (blk_size - 1)) - return -EINVAL; - - if (base == 0) { - base = blk_size; - length -= blk_size; - } - - a = kzalloc(sizeof(struct gk20a_bitmap_allocator), GFP_KERNEL); - if (!a) - return -ENOMEM; - - err = __gk20a_alloc_common_init(__a, name, a, false, &bitmap_ops); - if (err) - goto fail; - - a->base = base; - a->length = length; - a->blk_size = blk_size; - a->blk_shift = __ffs(a->blk_size); - a->num_bits = length >> a->blk_shift; - a->bit_offs = a->base >> a->blk_shift; - a->flags = flags; - - a->bitmap = kcalloc(BITS_TO_LONGS(a->num_bits), sizeof(*a->bitmap), - GFP_KERNEL); - if (!a->bitmap) - goto fail; - - wmb(); - a->inited = true; - - gk20a_init_alloc_debug(g, __a); - alloc_dbg(__a, "New allocator: type bitmap\n"); - alloc_dbg(__a, " base 0x%llx\n", a->base); - alloc_dbg(__a, " bit_offs 0x%llx\n", a->bit_offs); - alloc_dbg(__a, " size 0x%llx\n", a->length); - alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size); - alloc_dbg(__a, " flags 0x%llx\n", a->flags); - - return 0; - -fail: - kfree(a); - return err; -} diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_buddy.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_buddy.c deleted file mode 100644 index 3715e9f8..00000000 --- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_buddy.c +++ /dev/null @@ -1,1327 +0,0 @@ -/* - * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include - -#include "mm_gk20a.h" -#include "platform_gk20a.h" -#include "gk20a_allocator.h" -#include "buddy_allocator_priv.h" - -static struct kmem_cache *buddy_cache; /* slab cache for meta data. */ - -/* Some other buddy allocator functions. */ -static struct gk20a_buddy *balloc_free_buddy(struct gk20a_buddy_allocator *a, - u64 addr); -static void balloc_coalesce(struct gk20a_buddy_allocator *a, - struct gk20a_buddy *b); -static void __balloc_do_free_fixed(struct gk20a_buddy_allocator *a, - struct gk20a_fixed_alloc *falloc); - -/* - * This function is not present in older kernel's list.h code. - */ -#ifndef list_last_entry -#define list_last_entry(ptr, type, member) \ - list_entry((ptr)->prev, type, member) -#endif - -/* - * GPU buddy allocator for various address spaces. - * - * Current limitations: - * o A fixed allocation could potentially be made that borders PDEs with - * different PTE sizes. This would require that fixed buffer to have - * different sized PTEs for different parts of the allocation. Probably - * best to just require PDE alignment for fixed address allocs. - * - * o It is currently possible to make an allocator that has a buddy alignment - * out of sync with the PDE block size alignment. A simple example is a - * 32GB address space starting at byte 1. Every buddy is shifted off by 1 - * which means each buddy corresponf to more than one actual GPU page. The - * best way to fix this is probably just require PDE blocksize alignment - * for the start of the address space. At the moment all allocators are - * easily PDE aligned so this hasn't been a problem. - */ - -/* - * Pick a suitable maximum order for this allocator. - * - * Hueristic: Just guessing that the best max order is the largest single - * block that will fit in the address space. - */ -static void balloc_compute_max_order(struct gk20a_buddy_allocator *a) -{ - u64 true_max_order = ilog2(a->blks); - - if (a->max_order == 0) { - a->max_order = true_max_order; - return; - } - - if (a->max_order > true_max_order) - a->max_order = true_max_order; - if (a->max_order > GPU_BALLOC_MAX_ORDER) - a->max_order = GPU_BALLOC_MAX_ORDER; -} - -/* - * Since we can only allocate in chucks of a->blk_size we need to trim off - * any excess data that is not aligned to a->blk_size. - */ -static void balloc_allocator_align(struct gk20a_buddy_allocator *a) -{ - a->start = ALIGN(a->base, a->blk_size); - WARN_ON(a->start != a->base); - a->end = (a->base + a->length) & ~(a->blk_size - 1); - a->count = a->end - a->start; - a->blks = a->count >> a->blk_shift; -} - -/* - * Pass NULL for parent if you want a top level buddy. - */ -static struct gk20a_buddy *balloc_new_buddy(struct gk20a_buddy_allocator *a, - struct gk20a_buddy *parent, - u64 start, u64 order) -{ - struct gk20a_buddy *new_buddy; - - new_buddy = kmem_cache_alloc(buddy_cache, GFP_KERNEL); - if (!new_buddy) - return NULL; - - memset(new_buddy, 0, sizeof(struct gk20a_buddy)); - - new_buddy->parent = parent; - new_buddy->start = start; - new_buddy->order = order; - new_buddy->end = start + (1 << order) * a->blk_size; - new_buddy->pte_size = BALLOC_PTE_SIZE_ANY; - - return new_buddy; -} - -static void __balloc_buddy_list_add(struct gk20a_buddy_allocator *a, - struct gk20a_buddy *b, - struct list_head *list) -{ - if (buddy_is_in_list(b)) { - alloc_dbg(balloc_owner(a), - "Oops: adding added buddy (%llu:0x%llx)\n", - b->order, b->start); - BUG(); - } - - /* - * Add big PTE blocks to the tail, small to the head for GVA spaces. - * This lets the code that checks if there are available blocks check - * without cycling through the entire list. - */ - if (a->flags & GPU_ALLOC_GVA_SPACE && - b->pte_size == gmmu_page_size_big) - list_add_tail(&b->buddy_entry, list); - else - list_add(&b->buddy_entry, list); - - buddy_set_in_list(b); -} - -static void __balloc_buddy_list_rem(struct gk20a_buddy_allocator *a, - struct gk20a_buddy *b) -{ - if (!buddy_is_in_list(b)) { - alloc_dbg(balloc_owner(a), - "Oops: removing removed buddy (%llu:0x%llx)\n", - b->order, b->start); - BUG(); - } - - list_del_init(&b->buddy_entry); - buddy_clr_in_list(b); -} - -/* - * Add a buddy to one of the buddy lists and deal with the necessary - * book keeping. Adds the buddy to the list specified by the buddy's order. - */ -static void balloc_blist_add(struct gk20a_buddy_allocator *a, - struct gk20a_buddy *b) -{ - __balloc_buddy_list_add(a, b, balloc_get_order_list(a, b->order)); - a->buddy_list_len[b->order]++; -} - -static void balloc_blist_rem(struct gk20a_buddy_allocator *a, - struct gk20a_buddy *b) -{ - __balloc_buddy_list_rem(a, b); - a->buddy_list_len[b->order]--; -} - -static u64 balloc_get_order(struct gk20a_buddy_allocator *a, u64 len) -{ - if (len == 0) - return 0; - - len--; - len >>= a->blk_shift; - - return fls(len); -} - -static u64 __balloc_max_order_in(struct gk20a_buddy_allocator *a, - u64 start, u64 end) -{ - u64 size = (end - start) >> a->blk_shift; - - if (size > 0) - return min_t(u64, ilog2(size), a->max_order); - else - return GPU_BALLOC_MAX_ORDER; -} - -/* - * Initialize the buddy lists. - */ -static int balloc_init_lists(struct gk20a_buddy_allocator *a) -{ - int i; - u64 bstart, bend, order; - struct gk20a_buddy *buddy; - - bstart = a->start; - bend = a->end; - - /* First make sure the LLs are valid. */ - for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) - INIT_LIST_HEAD(balloc_get_order_list(a, i)); - - while (bstart < bend) { - order = __balloc_max_order_in(a, bstart, bend); - - buddy = balloc_new_buddy(a, NULL, bstart, order); - if (!buddy) - goto cleanup; - - balloc_blist_add(a, buddy); - bstart += balloc_order_to_len(a, order); - } - - return 0; - -cleanup: - for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) { - if (!list_empty(balloc_get_order_list(a, i))) { - buddy = list_first_entry(balloc_get_order_list(a, i), - struct gk20a_buddy, buddy_entry); - balloc_blist_rem(a, buddy); - kmem_cache_free(buddy_cache, buddy); - } - } - - return -ENOMEM; -} - -/* - * Clean up and destroy the passed allocator. - */ -static void gk20a_buddy_allocator_destroy(struct gk20a_allocator *__a) -{ - int i; - struct rb_node *node; - struct gk20a_buddy *bud; - struct gk20a_fixed_alloc *falloc; - struct gk20a_buddy_allocator *a = __a->priv; - - alloc_lock(__a); - - gk20a_fini_alloc_debug(__a); - - /* - * Free the fixed allocs first. - */ - while ((node = rb_first(&a->fixed_allocs)) != NULL) { - falloc = container_of(node, - struct gk20a_fixed_alloc, alloced_entry); - - rb_erase(node, &a->fixed_allocs); - __balloc_do_free_fixed(a, falloc); - } - - /* - * And now free all outstanding allocations. - */ - while ((node = rb_first(&a->alloced_buddies)) != NULL) { - bud = container_of(node, struct gk20a_buddy, alloced_entry); - balloc_free_buddy(a, bud->start); - balloc_blist_add(a, bud); - balloc_coalesce(a, bud); - } - - /* - * Now clean up the unallocated buddies. - */ - for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) { - BUG_ON(a->buddy_list_alloced[i] != 0); - - while (!list_empty(balloc_get_order_list(a, i))) { - bud = list_first_entry(balloc_get_order_list(a, i), - struct gk20a_buddy, buddy_entry); - balloc_blist_rem(a, bud); - kmem_cache_free(buddy_cache, bud); - } - - if (a->buddy_list_len[i] != 0) { - pr_info("Excess buddies!!! (%d: %llu)\n", - i, a->buddy_list_len[i]); - BUG(); - } - if (a->buddy_list_split[i] != 0) { - pr_info("Excess split nodes!!! (%d: %llu)\n", - i, a->buddy_list_split[i]); - BUG(); - } - if (a->buddy_list_alloced[i] != 0) { - pr_info("Excess alloced nodes!!! (%d: %llu)\n", - i, a->buddy_list_alloced[i]); - BUG(); - } - } - - kfree(a); - - alloc_unlock(__a); -} - -/* - * Combine the passed buddy if possible. The pointer in @b may not be valid - * after this as the buddy may be freed. - * - * @a must be locked. - */ -static void balloc_coalesce(struct gk20a_buddy_allocator *a, - struct gk20a_buddy *b) -{ - struct gk20a_buddy *parent; - - if (buddy_is_alloced(b) || buddy_is_split(b)) - return; - - /* - * If both our buddy and I are both not allocated and not split then - * we can coalesce ourselves. - */ - if (!b->buddy) - return; - if (buddy_is_alloced(b->buddy) || buddy_is_split(b->buddy)) - return; - - parent = b->parent; - - balloc_blist_rem(a, b); - balloc_blist_rem(a, b->buddy); - - buddy_clr_split(parent); - a->buddy_list_split[parent->order]--; - balloc_blist_add(a, parent); - - /* - * Recursively coalesce as far as we can go. - */ - balloc_coalesce(a, parent); - - /* Clean up the remains. */ - kmem_cache_free(buddy_cache, b->buddy); - kmem_cache_free(buddy_cache, b); -} - -/* - * Split a buddy into two new buddies who are 1/2 the size of the parent buddy. - * - * @a must be locked. - */ -static int balloc_split_buddy(struct gk20a_buddy_allocator *a, - struct gk20a_buddy *b, int pte_size) -{ - struct gk20a_buddy *left, *right; - u64 half; - - left = balloc_new_buddy(a, b, b->start, b->order - 1); - if (!left) - return -ENOMEM; - - half = (b->end - b->start) / 2; - - right = balloc_new_buddy(a, b, b->start + half, b->order - 1); - if (!right) { - kmem_cache_free(buddy_cache, left); - return -ENOMEM; - } - - buddy_set_split(b); - a->buddy_list_split[b->order]++; - - b->left = left; - b->right = right; - left->buddy = right; - right->buddy = left; - left->parent = b; - right->parent = b; - - /* PTE considerations. */ - if (a->flags & GPU_ALLOC_GVA_SPACE && - left->order <= a->pte_blk_order) { - left->pte_size = pte_size; - right->pte_size = pte_size; - } - - balloc_blist_rem(a, b); - balloc_blist_add(a, left); - balloc_blist_add(a, right); - - return 0; -} - -/* - * Place the passed buddy into the RB tree for allocated buddies. Never fails - * unless the passed entry is a duplicate which is a bug. - * - * @a must be locked. - */ -static void balloc_alloc_buddy(struct gk20a_buddy_allocator *a, - struct gk20a_buddy *b) -{ - struct rb_node **new = &(a->alloced_buddies.rb_node); - struct rb_node *parent = NULL; - - while (*new) { - struct gk20a_buddy *bud = container_of(*new, struct gk20a_buddy, - alloced_entry); - - parent = *new; - if (b->start < bud->start) - new = &((*new)->rb_left); - else if (b->start > bud->start) - new = &((*new)->rb_right); - else - BUG_ON("Duplicate entries in allocated list!\n"); - } - - rb_link_node(&b->alloced_entry, parent, new); - rb_insert_color(&b->alloced_entry, &a->alloced_buddies); - - buddy_set_alloced(b); - a->buddy_list_alloced[b->order]++; -} - -/* - * Remove the passed buddy from the allocated buddy RB tree. Returns the - * deallocated buddy for further processing. - * - * @a must be locked. - */ -static struct gk20a_buddy *balloc_free_buddy(struct gk20a_buddy_allocator *a, - u64 addr) -{ - struct rb_node *node = a->alloced_buddies.rb_node; - struct gk20a_buddy *bud; - - while (node) { - bud = container_of(node, struct gk20a_buddy, alloced_entry); - - if (addr < bud->start) - node = node->rb_left; - else if (addr > bud->start) - node = node->rb_right; - else - break; - } - - if (!node) - return NULL; - - rb_erase(node, &a->alloced_buddies); - buddy_clr_alloced(bud); - a->buddy_list_alloced[bud->order]--; - - return bud; -} - -/* - * Find a suitable buddy for the given order and PTE type (big or little). - */ -static struct gk20a_buddy *__balloc_find_buddy(struct gk20a_buddy_allocator *a, - u64 order, int pte_size) -{ - struct gk20a_buddy *bud; - - if (order > a->max_order || - list_empty(balloc_get_order_list(a, order))) - return NULL; - - if (a->flags & GPU_ALLOC_GVA_SPACE && - pte_size == gmmu_page_size_big) - bud = list_last_entry(balloc_get_order_list(a, order), - struct gk20a_buddy, buddy_entry); - else - bud = list_first_entry(balloc_get_order_list(a, order), - struct gk20a_buddy, buddy_entry); - - if (bud->pte_size != BALLOC_PTE_SIZE_ANY && - bud->pte_size != pte_size) - return NULL; - - return bud; -} - -/* - * Allocate a suitably sized buddy. If no suitable buddy exists split higher - * order buddies until we have a suitable buddy to allocate. - * - * For PDE grouping add an extra check to see if a buddy is suitable: that the - * buddy exists in a PDE who's PTE size is reasonable - * - * @a must be locked. - */ -static u64 __balloc_do_alloc(struct gk20a_buddy_allocator *a, - u64 order, int pte_size) -{ - u64 split_order; - struct gk20a_buddy *bud = NULL; - - split_order = order; - while (split_order <= a->max_order && - !(bud = __balloc_find_buddy(a, split_order, pte_size))) - split_order++; - - /* Out of memory! */ - if (!bud) - return 0; - - while (bud->order != order) { - if (balloc_split_buddy(a, bud, pte_size)) - return 0; /* No mem... */ - bud = bud->left; - } - - balloc_blist_rem(a, bud); - balloc_alloc_buddy(a, bud); - - return bud->start; -} - -/* - * See if the passed range is actually available for allocation. If so, then - * return 1, otherwise return 0. - * - * TODO: Right now this uses the unoptimal approach of going through all - * outstanding allocations and checking their base/ends. This could be better. - */ -static int balloc_is_range_free(struct gk20a_buddy_allocator *a, - u64 base, u64 end) -{ - struct rb_node *node; - struct gk20a_buddy *bud; - - node = rb_first(&a->alloced_buddies); - if (!node) - return 1; /* No allocs yet. */ - - bud = container_of(node, struct gk20a_buddy, alloced_entry); - - while (bud->start < end) { - if ((bud->start > base && bud->start < end) || - (bud->end > base && bud->end < end)) - return 0; - - node = rb_next(node); - if (!node) - break; - bud = container_of(node, struct gk20a_buddy, alloced_entry); - } - - return 1; -} - -static void balloc_alloc_fixed(struct gk20a_buddy_allocator *a, - struct gk20a_fixed_alloc *f) -{ - struct rb_node **new = &(a->fixed_allocs.rb_node); - struct rb_node *parent = NULL; - - while (*new) { - struct gk20a_fixed_alloc *falloc = - container_of(*new, struct gk20a_fixed_alloc, - alloced_entry); - - BUG_ON(!virt_addr_valid(falloc)); - - parent = *new; - if (f->start < falloc->start) - new = &((*new)->rb_left); - else if (f->start > falloc->start) - new = &((*new)->rb_right); - else - BUG_ON("Duplicate entries in allocated list!\n"); - } - - rb_link_node(&f->alloced_entry, parent, new); - rb_insert_color(&f->alloced_entry, &a->fixed_allocs); -} - -/* - * Remove the passed buddy from the allocated buddy RB tree. Returns the - * deallocated buddy for further processing. - * - * @a must be locked. - */ -static struct gk20a_fixed_alloc *balloc_free_fixed( - struct gk20a_buddy_allocator *a, u64 addr) -{ - struct rb_node *node = a->fixed_allocs.rb_node; - struct gk20a_fixed_alloc *falloc; - - while (node) { - falloc = container_of(node, - struct gk20a_fixed_alloc, alloced_entry); - - if (addr < falloc->start) - node = node->rb_left; - else if (addr > falloc->start) - node = node->rb_right; - else - break; - } - - if (!node) - return NULL; - - rb_erase(node, &a->fixed_allocs); - - return falloc; -} - -/* - * Find the parent range - doesn't necessarily need the parent to actually exist - * as a buddy. Finding an existing parent comes later... - */ -static void __balloc_get_parent_range(struct gk20a_buddy_allocator *a, - u64 base, u64 order, - u64 *pbase, u64 *porder) -{ - u64 base_mask; - u64 shifted_base = balloc_base_shift(a, base); - - order++; - base_mask = ~((a->blk_size << order) - 1); - - shifted_base &= base_mask; - - *pbase = balloc_base_unshift(a, shifted_base); - *porder = order; -} - -/* - * Makes a buddy at the passed address. This will make all parent buddies - * necessary for this buddy to exist as well. - */ -static struct gk20a_buddy *__balloc_make_fixed_buddy( - struct gk20a_buddy_allocator *a, u64 base, u64 order) -{ - struct gk20a_buddy *bud = NULL; - struct list_head *order_list; - u64 cur_order = order, cur_base = base; - - /* - * Algo: - * 1. Keep jumping up a buddy order until we find the real buddy that - * this buddy exists in. - * 2. Then work our way down through the buddy tree until we hit a dead - * end. - * 3. Start splitting buddies until we split to the one we need to - * make. - */ - while (cur_order <= a->max_order) { - int found = 0; - - order_list = balloc_get_order_list(a, cur_order); - list_for_each_entry(bud, order_list, buddy_entry) { - if (bud->start == cur_base) { - found = 1; - break; - } - } - - if (found) - break; - - __balloc_get_parent_range(a, cur_base, cur_order, - &cur_base, &cur_order); - } - - if (cur_order > a->max_order) { - alloc_dbg(balloc_owner(a), "No buddy for range ???\n"); - return NULL; - } - - /* Split this buddy as necessary until we get the target buddy. */ - while (bud->start != base || bud->order != order) { - if (balloc_split_buddy(a, bud, BALLOC_PTE_SIZE_ANY)) { - balloc_coalesce(a, bud); - return NULL; - } - - if (base < bud->right->start) - bud = bud->left; - else - bud = bud->right; - - } - - return bud; -} - -static u64 __balloc_do_alloc_fixed(struct gk20a_buddy_allocator *a, - struct gk20a_fixed_alloc *falloc, - u64 base, u64 len) -{ - u64 shifted_base, inc_base; - u64 align_order; - - shifted_base = balloc_base_shift(a, base); - if (shifted_base == 0) - align_order = __fls(len >> a->blk_shift); - else - align_order = min_t(u64, - __ffs(shifted_base >> a->blk_shift), - __fls(len >> a->blk_shift)); - - if (align_order > a->max_order) { - alloc_dbg(balloc_owner(a), - "Align order too big: %llu > %llu\n", - align_order, a->max_order); - return 0; - } - - /* - * Generate a list of buddies that satisfy this allocation. - */ - inc_base = shifted_base; - while (inc_base < (shifted_base + len)) { - u64 order_len = balloc_order_to_len(a, align_order); - u64 remaining; - struct gk20a_buddy *bud; - - bud = __balloc_make_fixed_buddy(a, - balloc_base_unshift(a, inc_base), - align_order); - if (!bud) { - alloc_dbg(balloc_owner(a), - "Fixed buddy failed: {0x%llx, %llu}!\n", - balloc_base_unshift(a, inc_base), - align_order); - goto err_and_cleanup; - } - - balloc_blist_rem(a, bud); - balloc_alloc_buddy(a, bud); - __balloc_buddy_list_add(a, bud, &falloc->buddies); - - /* Book keeping. */ - inc_base += order_len; - remaining = (shifted_base + len) - inc_base; - align_order = __ffs(inc_base >> a->blk_shift); - - /* If we don't have much left - trim down align_order. */ - if (balloc_order_to_len(a, align_order) > remaining) - align_order = __balloc_max_order_in(a, inc_base, - inc_base + remaining); - } - - return base; - -err_and_cleanup: - while (!list_empty(&falloc->buddies)) { - struct gk20a_buddy *bud = list_first_entry(&falloc->buddies, - struct gk20a_buddy, - buddy_entry); - - __balloc_buddy_list_rem(a, bud); - balloc_free_buddy(a, bud->start); - kmem_cache_free(buddy_cache, bud); - } - - return 0; -} - -static void __balloc_do_free_fixed(struct gk20a_buddy_allocator *a, - struct gk20a_fixed_alloc *falloc) -{ - struct gk20a_buddy *bud; - - while (!list_empty(&falloc->buddies)) { - bud = list_first_entry(&falloc->buddies, - struct gk20a_buddy, - buddy_entry); - __balloc_buddy_list_rem(a, bud); - - balloc_free_buddy(a, bud->start); - balloc_blist_add(a, bud); - a->bytes_freed += balloc_order_to_len(a, bud->order); - - /* - * Attemp to defrag the allocation. - */ - balloc_coalesce(a, bud); - } - - kfree(falloc); -} - -/* - * Allocate memory from the passed allocator. - */ -static u64 gk20a_buddy_balloc(struct gk20a_allocator *__a, u64 len) -{ - u64 order, addr; - int pte_size; - struct gk20a_buddy_allocator *a = __a->priv; - - gk20a_alloc_trace_func(); - - alloc_lock(__a); - - order = balloc_get_order(a, len); - - if (order > a->max_order) { - alloc_unlock(__a); - alloc_dbg(balloc_owner(a), "Alloc fail\n"); - gk20a_alloc_trace_func_done(); - return 0; - } - - /* - * For now pass the base address of the allocator's region to - * __get_pte_size(). This ensures we get the right page size for - * the alloc but we don't have to know what the real address is - * going to be quite yet. - * - * TODO: once userspace supports a unified address space pass 0 for - * the base. This will make only 'len' affect the PTE size. - */ - if (a->flags & GPU_ALLOC_GVA_SPACE) - pte_size = __get_pte_size(a->vm, a->base, len); - else - pte_size = BALLOC_PTE_SIZE_ANY; - - addr = __balloc_do_alloc(a, order, pte_size); - - if (addr) { - a->bytes_alloced += len; - a->bytes_alloced_real += balloc_order_to_len(a, order); - alloc_dbg(balloc_owner(a), - "Alloc 0x%-10llx %3lld:0x%-10llx pte_size=%s\n", - addr, order, len, - pte_size == gmmu_page_size_big ? "big" : - pte_size == gmmu_page_size_small ? "small" : - "NA/any"); - } else { - alloc_dbg(balloc_owner(a), "Alloc failed: no mem!\n"); - } - - a->alloc_made = 1; - - alloc_unlock(__a); - - gk20a_alloc_trace_func_done(); - return addr; -} - -/* - * Requires @__a to be locked. - */ -static u64 __gk20a_balloc_fixed_buddy(struct gk20a_allocator *__a, - u64 base, u64 len) -{ - u64 ret, real_bytes = 0; - struct gk20a_buddy *bud; - struct gk20a_fixed_alloc *falloc = NULL; - struct gk20a_buddy_allocator *a = __a->priv; - - gk20a_alloc_trace_func(); - - /* If base isn't aligned to an order 0 block, fail. */ - if (base & (a->blk_size - 1)) - goto fail; - - if (len == 0) - goto fail; - - falloc = kmalloc(sizeof(*falloc), GFP_KERNEL); - if (!falloc) - goto fail; - - INIT_LIST_HEAD(&falloc->buddies); - falloc->start = base; - falloc->end = base + len; - - if (!balloc_is_range_free(a, base, base + len)) { - alloc_dbg(balloc_owner(a), - "Range not free: 0x%llx -> 0x%llx\n", - base, base + len); - goto fail_unlock; - } - - ret = __balloc_do_alloc_fixed(a, falloc, base, len); - if (!ret) { - alloc_dbg(balloc_owner(a), - "Alloc-fixed failed ?? 0x%llx -> 0x%llx\n", - base, base + len); - goto fail_unlock; - } - - balloc_alloc_fixed(a, falloc); - - list_for_each_entry(bud, &falloc->buddies, buddy_entry) - real_bytes += (bud->end - bud->start); - - a->bytes_alloced += len; - a->bytes_alloced_real += real_bytes; - - alloc_dbg(balloc_owner(a), "Alloc (fixed) 0x%llx\n", base); - - gk20a_alloc_trace_func_done(); - return base; - -fail_unlock: - alloc_unlock(__a); -fail: - kfree(falloc); - gk20a_alloc_trace_func_done(); - return 0; -} - -/* - * Allocate a fixed address allocation. The address of the allocation is @base - * and the length is @len. This is not a typical buddy allocator operation and - * as such has a high posibility of failure if the address space is heavily in - * use. - * - * Please do not use this function unless _absolutely_ necessary. - */ -static u64 gk20a_balloc_fixed_buddy(struct gk20a_allocator *__a, - u64 base, u64 len) -{ - u64 alloc; - struct gk20a_buddy_allocator *a = __a->priv; - - alloc_lock(__a); - alloc = __gk20a_balloc_fixed_buddy(__a, base, len); - a->alloc_made = 1; - alloc_unlock(__a); - - return alloc; -} - -/* - * Free the passed allocation. - */ -static void gk20a_buddy_bfree(struct gk20a_allocator *__a, u64 addr) -{ - struct gk20a_buddy *bud; - struct gk20a_fixed_alloc *falloc; - struct gk20a_buddy_allocator *a = __a->priv; - - gk20a_alloc_trace_func(); - - if (!addr) { - gk20a_alloc_trace_func_done(); - return; - } - - alloc_lock(__a); - - /* - * First see if this is a fixed alloc. If not fall back to a regular - * buddy. - */ - falloc = balloc_free_fixed(a, addr); - if (falloc) { - __balloc_do_free_fixed(a, falloc); - goto done; - } - - bud = balloc_free_buddy(a, addr); - if (!bud) - goto done; - - balloc_blist_add(a, bud); - a->bytes_freed += balloc_order_to_len(a, bud->order); - - /* - * Attemp to defrag the allocation. - */ - balloc_coalesce(a, bud); - -done: - alloc_unlock(__a); - alloc_dbg(balloc_owner(a), "Free 0x%llx\n", addr); - gk20a_alloc_trace_func_done(); - return; -} - -static bool gk20a_buddy_reserve_is_possible(struct gk20a_buddy_allocator *a, - struct gk20a_alloc_carveout *co) -{ - struct gk20a_alloc_carveout *tmp; - u64 co_base, co_end; - - co_base = co->base; - co_end = co->base + co->length; - - /* - * Not the fastest approach but we should not have that many carveouts - * for any reasonable allocator. - */ - list_for_each_entry(tmp, &a->co_list, co_entry) { - if ((co_base >= tmp->base && - co_base < (tmp->base + tmp->length)) || - (co_end >= tmp->base && - co_end < (tmp->base + tmp->length))) - return false; - } - - return true; -} - -/* - * Carveouts can only be reserved before any regular allocations have been - * made. - */ -static int gk20a_buddy_reserve_co(struct gk20a_allocator *__a, - struct gk20a_alloc_carveout *co) -{ - struct gk20a_buddy_allocator *a = __a->priv; - u64 addr; - int err = 0; - - if (co->base < a->start || (co->base + co->length) > a->end || - a->alloc_made) - return -EINVAL; - - alloc_lock(__a); - - if (!gk20a_buddy_reserve_is_possible(a, co)) { - err = -EBUSY; - goto done; - } - - /* Should not be possible to fail... */ - addr = __gk20a_balloc_fixed_buddy(__a, co->base, co->length); - if (!addr) { - err = -ENOMEM; - pr_warn("%s: Failed to reserve a valid carveout!\n", __func__); - goto done; - } - - list_add(&co->co_entry, &a->co_list); - -done: - alloc_unlock(__a); - return err; -} - -/* - * Carveouts can be release at any time. - */ -static void gk20a_buddy_release_co(struct gk20a_allocator *__a, - struct gk20a_alloc_carveout *co) -{ - alloc_lock(__a); - - list_del_init(&co->co_entry); - gk20a_free(__a, co->base); - - alloc_unlock(__a); -} - -static u64 gk20a_buddy_alloc_length(struct gk20a_allocator *a) -{ - struct gk20a_buddy_allocator *ba = a->priv; - - return ba->length; -} - -static u64 gk20a_buddy_alloc_base(struct gk20a_allocator *a) -{ - struct gk20a_buddy_allocator *ba = a->priv; - - return ba->start; -} - -static int gk20a_buddy_alloc_inited(struct gk20a_allocator *a) -{ - struct gk20a_buddy_allocator *ba = a->priv; - int inited = ba->initialized; - - rmb(); - return inited; -} - -static u64 gk20a_buddy_alloc_end(struct gk20a_allocator *a) -{ - struct gk20a_buddy_allocator *ba = a->priv; - - return ba->end; -} - -static u64 gk20a_buddy_alloc_space(struct gk20a_allocator *a) -{ - struct gk20a_buddy_allocator *ba = a->priv; - u64 space; - - alloc_lock(a); - space = ba->end - ba->start - - (ba->bytes_alloced_real - ba->bytes_freed); - alloc_unlock(a); - - return space; -} - -/* - * Print the buddy allocator top level stats. If you pass @s as NULL then the - * stats are printed to the kernel log. This lets this code be used for - * debugging purposes internal to the allocator. - */ -static void gk20a_buddy_print_stats(struct gk20a_allocator *__a, - struct seq_file *s, int lock) -{ - int i = 0; - struct rb_node *node; - struct gk20a_fixed_alloc *falloc; - struct gk20a_alloc_carveout *tmp; - struct gk20a_buddy_allocator *a = __a->priv; - - __alloc_pstat(s, __a, "base = %llu, limit = %llu, blk_size = %llu\n", - a->base, a->length, a->blk_size); - __alloc_pstat(s, __a, "Internal params:\n"); - __alloc_pstat(s, __a, " start = 0x%llx\n", a->start); - __alloc_pstat(s, __a, " end = 0x%llx\n", a->end); - __alloc_pstat(s, __a, " count = 0x%llx\n", a->count); - __alloc_pstat(s, __a, " blks = 0x%llx\n", a->blks); - __alloc_pstat(s, __a, " max_order = %llu\n", a->max_order); - - if (lock) - alloc_lock(__a); - - if (!list_empty(&a->co_list)) { - __alloc_pstat(s, __a, "\n"); - __alloc_pstat(s, __a, "Carveouts:\n"); - list_for_each_entry(tmp, &a->co_list, co_entry) - __alloc_pstat(s, __a, - " CO %2d: %-20s 0x%010llx + 0x%llx\n", - i++, tmp->name, tmp->base, tmp->length); - } - - __alloc_pstat(s, __a, "\n"); - __alloc_pstat(s, __a, "Buddy blocks:\n"); - __alloc_pstat(s, __a, " Order Free Alloced Split\n"); - __alloc_pstat(s, __a, " ----- ---- ------- -----\n"); - - for (i = a->max_order; i >= 0; i--) { - if (a->buddy_list_len[i] == 0 && - a->buddy_list_alloced[i] == 0 && - a->buddy_list_split[i] == 0) - continue; - - __alloc_pstat(s, __a, " %3d %-7llu %-9llu %llu\n", i, - a->buddy_list_len[i], - a->buddy_list_alloced[i], - a->buddy_list_split[i]); - } - - __alloc_pstat(s, __a, "\n"); - - for (node = rb_first(&a->fixed_allocs), i = 1; - node != NULL; - node = rb_next(node)) { - falloc = container_of(node, - struct gk20a_fixed_alloc, alloced_entry); - - __alloc_pstat(s, __a, "Fixed alloc (%d): [0x%llx -> 0x%llx]\n", - i, falloc->start, falloc->end); - } - - __alloc_pstat(s, __a, "\n"); - __alloc_pstat(s, __a, "Bytes allocated: %llu\n", - a->bytes_alloced); - __alloc_pstat(s, __a, "Bytes allocated (real): %llu\n", - a->bytes_alloced_real); - __alloc_pstat(s, __a, "Bytes freed: %llu\n", - a->bytes_freed); - - if (lock) - alloc_unlock(__a); -} - -static const struct gk20a_allocator_ops buddy_ops = { - .alloc = gk20a_buddy_balloc, - .free = gk20a_buddy_bfree, - - .alloc_fixed = gk20a_balloc_fixed_buddy, - /* .free_fixed not needed. */ - - .reserve_carveout = gk20a_buddy_reserve_co, - .release_carveout = gk20a_buddy_release_co, - - .base = gk20a_buddy_alloc_base, - .length = gk20a_buddy_alloc_length, - .end = gk20a_buddy_alloc_end, - .inited = gk20a_buddy_alloc_inited, - .space = gk20a_buddy_alloc_space, - - .fini = gk20a_buddy_allocator_destroy, - - .print_stats = gk20a_buddy_print_stats, -}; - -/* - * Initialize a buddy allocator. Returns 0 on success. This allocator does - * not necessarily manage bytes. It manages distinct ranges of resources. This - * allows the allocator to work for things like comp_tags, semaphores, etc. - * - * @allocator: Ptr to an allocator struct to init. - * @vm: GPU VM to associate this allocator with. Can be NULL. Will be used to - * get PTE size for GVA spaces. - * @name: Name of the allocator. Doesn't have to be static storage. - * @base: The base address of the resource pool being managed. - * @size: Number of resources in the pool. - * @blk_size: Minimum number of resources to allocate at once. For things like - * semaphores this is 1. For GVA this might be as much as 64k. This - * corresponds to order 0. Must be power of 2. - * @max_order: Pick a maximum order. If you leave this as 0, the buddy allocator - * will try and pick a reasonable max order. - * @flags: Extra flags necessary. See GPU_BALLOC_*. - */ -int __gk20a_buddy_allocator_init(struct gk20a *g, struct gk20a_allocator *__a, - struct vm_gk20a *vm, const char *name, - u64 base, u64 size, u64 blk_size, - u64 max_order, u64 flags) -{ - int err; - u64 pde_size; - struct gk20a_buddy_allocator *a; - - /* blk_size must be greater than 0 and a power of 2. */ - if (blk_size == 0) - return -EINVAL; - if (blk_size & (blk_size - 1)) - return -EINVAL; - - if (max_order > GPU_BALLOC_MAX_ORDER) - return -EINVAL; - - /* If this is to manage a GVA space we need a VM. */ - if (flags & GPU_ALLOC_GVA_SPACE && !vm) - return -EINVAL; - - a = kzalloc(sizeof(struct gk20a_buddy_allocator), GFP_KERNEL); - if (!a) - return -ENOMEM; - - err = __gk20a_alloc_common_init(__a, name, a, false, &buddy_ops); - if (err) - goto fail; - - a->base = base; - a->length = size; - a->blk_size = blk_size; - a->blk_shift = __ffs(blk_size); - a->owner = __a; - - /* - * If base is 0 then modfy base to be the size of one block so that we - * can return errors by returning addr == 0. - */ - if (a->base == 0) { - a->base = a->blk_size; - a->length -= a->blk_size; - } - - a->vm = vm; - if (flags & GPU_ALLOC_GVA_SPACE) { - pde_size = ((u64)vm->big_page_size) << 10; - a->pte_blk_order = balloc_get_order(a, pde_size); - } - - /* - * When we have a GVA space with big_pages enabled the size and base - * must be PDE aligned. If big_pages are not enabled then this - * requirement is not necessary. - */ - if (flags & GPU_ALLOC_GVA_SPACE && vm->big_pages && - (base & ((vm->big_page_size << 10) - 1) || - size & ((vm->big_page_size << 10) - 1))) - return -EINVAL; - - a->flags = flags; - a->max_order = max_order; - - balloc_allocator_align(a); - balloc_compute_max_order(a); - - /* Shared buddy kmem_cache for all allocators. */ - if (!buddy_cache) - buddy_cache = KMEM_CACHE(gk20a_buddy, 0); - if (!buddy_cache) { - err = -ENOMEM; - goto fail; - } - - a->alloced_buddies = RB_ROOT; - a->fixed_allocs = RB_ROOT; - INIT_LIST_HEAD(&a->co_list); - err = balloc_init_lists(a); - if (err) - goto fail; - - wmb(); - a->initialized = 1; - - gk20a_init_alloc_debug(g, __a); - alloc_dbg(__a, "New allocator: type buddy\n"); - alloc_dbg(__a, " base 0x%llx\n", a->base); - alloc_dbg(__a, " size 0x%llx\n", a->length); - alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size); - alloc_dbg(__a, " max_order %llu\n", a->max_order); - alloc_dbg(__a, " flags 0x%llx\n", a->flags); - - return 0; - -fail: - kfree(a); - return err; -} - -int gk20a_buddy_allocator_init(struct gk20a *g, struct gk20a_allocator *a, - const char *name, u64 base, u64 size, - u64 blk_size, u64 flags) -{ - return __gk20a_buddy_allocator_init(g, a, NULL, name, - base, size, blk_size, 0, 0); -} diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_lockless.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_lockless.c deleted file mode 100644 index 5b011d8c..00000000 --- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_lockless.c +++ /dev/null @@ -1,206 +0,0 @@ -/* - * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include -#include - -#include "gk20a_allocator.h" -#include "lockless_allocator_priv.h" - -static u64 gk20a_lockless_alloc_length(struct gk20a_allocator *a) -{ - struct gk20a_lockless_allocator *pa = a->priv; - - return pa->length; -} - -static u64 gk20a_lockless_alloc_base(struct gk20a_allocator *a) -{ - struct gk20a_lockless_allocator *pa = a->priv; - - return pa->base; -} - -static int gk20a_lockless_alloc_inited(struct gk20a_allocator *a) -{ - struct gk20a_lockless_allocator *pa = a->priv; - int inited = pa->inited; - - rmb(); - return inited; -} - -static u64 gk20a_lockless_alloc_end(struct gk20a_allocator *a) -{ - struct gk20a_lockless_allocator *pa = a->priv; - - return pa->base + pa->length; -} - -static u64 gk20a_lockless_alloc(struct gk20a_allocator *a, u64 len) -{ - struct gk20a_lockless_allocator *pa = a->priv; - int head, new_head, ret; - u64 addr = 0; - - if (len != pa->blk_size) - return 0; - - head = ACCESS_ONCE(pa->head); - while (head >= 0) { - new_head = ACCESS_ONCE(pa->next[head]); - ret = cmpxchg(&pa->head, head, new_head); - if (ret == head) { - addr = pa->base + head * pa->blk_size; - atomic_inc(&pa->nr_allocs); - alloc_dbg(a, "Alloc node # %d @ addr 0x%llx\n", head, - addr); - break; - } - head = ACCESS_ONCE(pa->head); - } - return addr; -} - -static void gk20a_lockless_free(struct gk20a_allocator *a, u64 addr) -{ - struct gk20a_lockless_allocator *pa = a->priv; - int head, ret; - u64 cur_idx, rem; - - cur_idx = addr - pa->base; - rem = do_div(cur_idx, pa->blk_size); - - while (1) { - head = ACCESS_ONCE(pa->head); - ACCESS_ONCE(pa->next[cur_idx]) = head; - ret = cmpxchg(&pa->head, head, cur_idx); - if (ret == head) { - atomic_dec(&pa->nr_allocs); - alloc_dbg(a, "Free node # %llu\n", cur_idx); - break; - } - } -} - -static void gk20a_lockless_alloc_destroy(struct gk20a_allocator *a) -{ - struct gk20a_lockless_allocator *pa = a->priv; - - gk20a_fini_alloc_debug(a); - - vfree(pa->next); - kfree(pa); -} - -static void gk20a_lockless_print_stats(struct gk20a_allocator *a, - struct seq_file *s, int lock) -{ - struct gk20a_lockless_allocator *pa = a->priv; - - __alloc_pstat(s, a, "Lockless allocator params:\n"); - __alloc_pstat(s, a, " start = 0x%llx\n", pa->base); - __alloc_pstat(s, a, " end = 0x%llx\n", pa->base + pa->length); - - /* Actual stats. */ - __alloc_pstat(s, a, "Stats:\n"); - __alloc_pstat(s, a, " Number allocs = %d\n", - atomic_read(&pa->nr_allocs)); - __alloc_pstat(s, a, " Number free = %d\n", - pa->nr_nodes - atomic_read(&pa->nr_allocs)); -} - -static const struct gk20a_allocator_ops pool_ops = { - .alloc = gk20a_lockless_alloc, - .free = gk20a_lockless_free, - - .base = gk20a_lockless_alloc_base, - .length = gk20a_lockless_alloc_length, - .end = gk20a_lockless_alloc_end, - .inited = gk20a_lockless_alloc_inited, - - .fini = gk20a_lockless_alloc_destroy, - - .print_stats = gk20a_lockless_print_stats, -}; - -int gk20a_lockless_allocator_init(struct gk20a *g, struct gk20a_allocator *__a, - const char *name, u64 base, u64 length, - u64 blk_size, u64 flags) -{ - int i; - int err; - int nr_nodes; - u64 count, rem; - struct gk20a_lockless_allocator *a; - - if (!blk_size) - return -EINVAL; - - /* - * Ensure we have space for atleast one node & there's no overflow. - * In order to control memory footprint, we require count < INT_MAX - */ - count = length; - rem = do_div(count, blk_size); - if (!base || !count || count > INT_MAX) - return -EINVAL; - - a = kzalloc(sizeof(struct gk20a_lockless_allocator), GFP_KERNEL); - if (!a) - return -ENOMEM; - - err = __gk20a_alloc_common_init(__a, name, a, false, &pool_ops); - if (err) - goto fail; - - a->next = vzalloc(sizeof(*a->next) * count); - if (!a->next) { - err = -ENOMEM; - goto fail; - } - - /* chain the elements together to form the initial free list */ - nr_nodes = (int)count; - for (i = 0; i < nr_nodes; i++) - a->next[i] = i + 1; - a->next[nr_nodes - 1] = -1; - - a->base = base; - a->length = length; - a->blk_size = blk_size; - a->nr_nodes = nr_nodes; - a->flags = flags; - atomic_set(&a->nr_allocs, 0); - - wmb(); - a->inited = true; - - gk20a_init_alloc_debug(g, __a); - alloc_dbg(__a, "New allocator: type lockless\n"); - alloc_dbg(__a, " base 0x%llx\n", a->base); - alloc_dbg(__a, " nodes %d\n", a->nr_nodes); - alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size); - alloc_dbg(__a, " flags 0x%llx\n", a->flags); - - return 0; - -fail: - kfree(a); - return err; -} diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c deleted file mode 100644 index 9717a726..00000000 --- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c +++ /dev/null @@ -1,936 +0,0 @@ -/* - * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include -#include - -#include "gk20a_allocator.h" -#include "buddy_allocator_priv.h" -#include "page_allocator_priv.h" - -#define palloc_dbg(a, fmt, arg...) \ - alloc_dbg(palloc_owner(a), fmt, ##arg) - -static struct kmem_cache *page_alloc_cache; -static struct kmem_cache *page_alloc_chunk_cache; -static struct kmem_cache *page_alloc_slab_page_cache; -static DEFINE_MUTEX(meta_data_cache_lock); - -/* - * Handle the book-keeping for these operations. - */ -static inline void add_slab_page_to_empty(struct page_alloc_slab *slab, - struct page_alloc_slab_page *page) -{ - BUG_ON(page->state != SP_NONE); - list_add(&page->list_entry, &slab->empty); - slab->nr_empty++; - page->state = SP_EMPTY; -} -static inline void add_slab_page_to_partial(struct page_alloc_slab *slab, - struct page_alloc_slab_page *page) -{ - BUG_ON(page->state != SP_NONE); - list_add(&page->list_entry, &slab->partial); - slab->nr_partial++; - page->state = SP_PARTIAL; -} -static inline void add_slab_page_to_full(struct page_alloc_slab *slab, - struct page_alloc_slab_page *page) -{ - BUG_ON(page->state != SP_NONE); - list_add(&page->list_entry, &slab->full); - slab->nr_full++; - page->state = SP_FULL; -} - -static inline void del_slab_page_from_empty(struct page_alloc_slab *slab, - struct page_alloc_slab_page *page) -{ - list_del_init(&page->list_entry); - slab->nr_empty--; - page->state = SP_NONE; -} -static inline void del_slab_page_from_partial(struct page_alloc_slab *slab, - struct page_alloc_slab_page *page) -{ - list_del_init(&page->list_entry); - slab->nr_partial--; - page->state = SP_NONE; -} -static inline void del_slab_page_from_full(struct page_alloc_slab *slab, - struct page_alloc_slab_page *page) -{ - list_del_init(&page->list_entry); - slab->nr_full--; - page->state = SP_NONE; -} - -static u64 gk20a_page_alloc_length(struct gk20a_allocator *a) -{ - struct gk20a_page_allocator *va = a->priv; - - return gk20a_alloc_length(&va->source_allocator); -} - -static u64 gk20a_page_alloc_base(struct gk20a_allocator *a) -{ - struct gk20a_page_allocator *va = a->priv; - - return gk20a_alloc_base(&va->source_allocator); -} - -static int gk20a_page_alloc_inited(struct gk20a_allocator *a) -{ - struct gk20a_page_allocator *va = a->priv; - - return gk20a_alloc_initialized(&va->source_allocator); -} - -static u64 gk20a_page_alloc_end(struct gk20a_allocator *a) -{ - struct gk20a_page_allocator *va = a->priv; - - return gk20a_alloc_end(&va->source_allocator); -} - -static u64 gk20a_page_alloc_space(struct gk20a_allocator *a) -{ - struct gk20a_page_allocator *va = a->priv; - - return gk20a_alloc_space(&va->source_allocator); -} - -static int gk20a_page_reserve_co(struct gk20a_allocator *a, - struct gk20a_alloc_carveout *co) -{ - struct gk20a_page_allocator *va = a->priv; - - return gk20a_alloc_reserve_carveout(&va->source_allocator, co); -} - -static void gk20a_page_release_co(struct gk20a_allocator *a, - struct gk20a_alloc_carveout *co) -{ - struct gk20a_page_allocator *va = a->priv; - - gk20a_alloc_release_carveout(&va->source_allocator, co); -} - -static void __gk20a_free_pages(struct gk20a_page_allocator *a, - struct gk20a_page_alloc *alloc, - bool free_buddy_alloc) -{ - struct page_alloc_chunk *chunk; - - while (!list_empty(&alloc->alloc_chunks)) { - chunk = list_first_entry(&alloc->alloc_chunks, - struct page_alloc_chunk, - list_entry); - list_del(&chunk->list_entry); - - if (free_buddy_alloc) - gk20a_free(&a->source_allocator, chunk->base); - kfree(chunk); - } - - kfree(alloc); -} - -static int __insert_page_alloc(struct gk20a_page_allocator *a, - struct gk20a_page_alloc *alloc) -{ - struct rb_node **new = &a->allocs.rb_node; - struct rb_node *parent = NULL; - - while (*new) { - struct gk20a_page_alloc *tmp = - container_of(*new, struct gk20a_page_alloc, - tree_entry); - - parent = *new; - if (alloc->base < tmp->base) { - new = &((*new)->rb_left); - } else if (alloc->base > tmp->base) { - new = &((*new)->rb_right); - } else { - WARN(1, "Duplicate entries in allocated list!\n"); - return 0; - } - } - - rb_link_node(&alloc->tree_entry, parent, new); - rb_insert_color(&alloc->tree_entry, &a->allocs); - - return 0; -} - -static struct gk20a_page_alloc *__find_page_alloc( - struct gk20a_page_allocator *a, - u64 addr) -{ - struct rb_node *node = a->allocs.rb_node; - struct gk20a_page_alloc *alloc; - - while (node) { - alloc = container_of(node, struct gk20a_page_alloc, tree_entry); - - if (addr < alloc->base) - node = node->rb_left; - else if (addr > alloc->base) - node = node->rb_right; - else - break; - } - - if (!node) - return NULL; - - rb_erase(node, &a->allocs); - - return alloc; -} - -static struct page_alloc_slab_page *alloc_slab_page( - struct gk20a_page_allocator *a, - struct page_alloc_slab *slab) -{ - struct page_alloc_slab_page *slab_page; - - slab_page = kmem_cache_alloc(page_alloc_slab_page_cache, GFP_KERNEL); - if (!slab_page) { - palloc_dbg(a, "OOM: unable to alloc slab_page struct!\n"); - return ERR_PTR(-ENOMEM); - } - - memset(slab_page, 0, sizeof(*slab_page)); - - slab_page->page_addr = gk20a_alloc(&a->source_allocator, a->page_size); - if (!slab_page->page_addr) { - kfree(slab_page); - palloc_dbg(a, "OOM: vidmem is full!\n"); - return ERR_PTR(-ENOMEM); - } - - INIT_LIST_HEAD(&slab_page->list_entry); - slab_page->slab_size = slab->slab_size; - slab_page->nr_objects = (u32)a->page_size / slab->slab_size; - slab_page->nr_objects_alloced = 0; - slab_page->owner = slab; - slab_page->state = SP_NONE; - - a->pages_alloced++; - - palloc_dbg(a, "Allocated new slab page @ 0x%012llx size=%u\n", - slab_page->page_addr, slab_page->slab_size); - - return slab_page; -} - -static void free_slab_page(struct gk20a_page_allocator *a, - struct page_alloc_slab_page *slab_page) -{ - palloc_dbg(a, "Freeing slab page @ 0x%012llx\n", slab_page->page_addr); - - BUG_ON((slab_page->state != SP_NONE && slab_page->state != SP_EMPTY) || - slab_page->nr_objects_alloced != 0 || - slab_page->bitmap != 0); - - gk20a_free(&a->source_allocator, slab_page->page_addr); - a->pages_freed++; - - kmem_cache_free(page_alloc_slab_page_cache, slab_page); -} - -/* - * This expects @alloc to have 1 empty page_alloc_chunk already added to the - * alloc_chunks list. - */ -static int __do_slab_alloc(struct gk20a_page_allocator *a, - struct page_alloc_slab *slab, - struct gk20a_page_alloc *alloc) -{ - struct page_alloc_slab_page *slab_page = NULL; - struct page_alloc_chunk *chunk; - unsigned long offs; - - /* - * Check the partial and empty lists to see if we have some space - * readily available. Take the slab_page out of what ever list it - * was in since it may be put back into a different list later. - */ - if (!list_empty(&slab->partial)) { - slab_page = list_first_entry(&slab->partial, - struct page_alloc_slab_page, - list_entry); - del_slab_page_from_partial(slab, slab_page); - } else if (!list_empty(&slab->empty)) { - slab_page = list_first_entry(&slab->empty, - struct page_alloc_slab_page, - list_entry); - del_slab_page_from_empty(slab, slab_page); - } - - if (!slab_page) { - slab_page = alloc_slab_page(a, slab); - if (IS_ERR(slab_page)) - return PTR_ERR(slab_page); - } - - /* - * We now have a slab_page. Do the alloc. - */ - offs = bitmap_find_next_zero_area(&slab_page->bitmap, - slab_page->nr_objects, - 0, 1, 0); - if (offs >= slab_page->nr_objects) { - WARN(1, "Empty/partial slab with no free objects?"); - - /* Add the buggy page to the full list... This isn't ideal. */ - add_slab_page_to_full(slab, slab_page); - return -ENOMEM; - } - - bitmap_set(&slab_page->bitmap, offs, 1); - slab_page->nr_objects_alloced++; - - if (slab_page->nr_objects_alloced < slab_page->nr_objects) - add_slab_page_to_partial(slab, slab_page); - else if (slab_page->nr_objects_alloced == slab_page->nr_objects) - add_slab_page_to_full(slab, slab_page); - else - BUG(); /* Should be impossible to hit this. */ - - /* - * Handle building the gk20a_page_alloc struct. We expect one - * page_alloc_chunk to be present. - */ - alloc->slab_page = slab_page; - alloc->nr_chunks = 1; - alloc->length = slab_page->slab_size; - alloc->base = slab_page->page_addr + (offs * slab_page->slab_size); - - chunk = list_first_entry(&alloc->alloc_chunks, - struct page_alloc_chunk, list_entry); - chunk->base = alloc->base; - chunk->length = alloc->length; - - return 0; -} - -/* - * Allocate from a slab instead of directly from the page allocator. - */ -static struct gk20a_page_alloc *__gk20a_alloc_slab( - struct gk20a_page_allocator *a, u64 len) -{ - int err, slab_nr; - struct page_alloc_slab *slab; - struct gk20a_page_alloc *alloc = NULL; - struct page_alloc_chunk *chunk = NULL; - - /* - * Align the length to a page and then divide by the page size (4k for - * this code). ilog2() of that then gets us the correct slab to use. - */ - slab_nr = (int)ilog2(PAGE_ALIGN(len) >> 12); - slab = &a->slabs[slab_nr]; - - alloc = kmem_cache_alloc(page_alloc_cache, GFP_KERNEL); - if (!alloc) { - palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n"); - goto fail; - } - chunk = kmem_cache_alloc(page_alloc_chunk_cache, GFP_KERNEL); - if (!chunk) { - palloc_dbg(a, "OOM: could not alloc alloc_chunk struct!\n"); - goto fail; - } - - INIT_LIST_HEAD(&alloc->alloc_chunks); - list_add(&chunk->list_entry, &alloc->alloc_chunks); - - err = __do_slab_alloc(a, slab, alloc); - if (err) - goto fail; - - palloc_dbg(a, "Alloc 0x%04llx sr=%d id=0x%010llx [slab]\n", - len, slab_nr, alloc->base); - a->nr_slab_allocs++; - - return alloc; - -fail: - kfree(alloc); - kfree(chunk); - return NULL; -} - -static void __gk20a_free_slab(struct gk20a_page_allocator *a, - struct gk20a_page_alloc *alloc) -{ - struct page_alloc_slab_page *slab_page = alloc->slab_page; - struct page_alloc_slab *slab = slab_page->owner; - enum slab_page_state new_state; - int offs; - - offs = (u32)(alloc->base - slab_page->page_addr) / slab_page->slab_size; - bitmap_clear(&slab_page->bitmap, offs, 1); - - slab_page->nr_objects_alloced--; - - if (slab_page->nr_objects_alloced == 0) - new_state = SP_EMPTY; - else - new_state = SP_PARTIAL; - - /* - * Need to migrate the page to a different list. - */ - if (new_state != slab_page->state) { - /* Delete - can't be in empty. */ - if (slab_page->state == SP_PARTIAL) - del_slab_page_from_partial(slab, slab_page); - else - del_slab_page_from_full(slab, slab_page); - - /* And add. */ - if (new_state == SP_EMPTY) { - if (list_empty(&slab->empty)) - add_slab_page_to_empty(slab, slab_page); - else - free_slab_page(a, slab_page); - } else { - add_slab_page_to_partial(slab, slab_page); - } - } - - /* - * Now handle the page_alloc. - */ - __gk20a_free_pages(a, alloc, false); - a->nr_slab_frees++; - - return; -} - -/* - * Allocate physical pages. Since the underlying allocator is a buddy allocator - * the returned pages are always contiguous. However, since there could be - * fragmentation in the space this allocator will collate smaller non-contiguous - * allocations together if necessary. - */ -static struct gk20a_page_alloc *__do_gk20a_alloc_pages( - struct gk20a_page_allocator *a, u64 pages) -{ - struct gk20a_page_alloc *alloc; - struct page_alloc_chunk *c; - u64 max_chunk_len = pages << a->page_shift; - int i = 0; - - alloc = kmem_cache_alloc(page_alloc_cache, GFP_KERNEL); - if (!alloc) - goto fail; - - memset(alloc, 0, sizeof(*alloc)); - - INIT_LIST_HEAD(&alloc->alloc_chunks); - alloc->length = pages << a->page_shift; - - while (pages) { - u64 chunk_addr = 0; - u64 chunk_pages = (u64)1 << __fls(pages); - u64 chunk_len = chunk_pages << a->page_shift; - - /* - * Take care of the possibility that the allocation must be - * contiguous. If this is not the first iteration then that - * means the first iteration failed to alloc the entire - * requested size. The buddy allocator guarantees any given - * single alloc is contiguous. - */ - if (a->flags & GPU_ALLOC_FORCE_CONTIG && i != 0) - goto fail_cleanup; - - if (chunk_len > max_chunk_len) - chunk_len = max_chunk_len; - - /* - * Keep attempting to allocate in smaller chunks until the alloc - * either succeeds or is smaller than the page_size of the - * allocator (i.e the allocator is OOM). - */ - do { - chunk_addr = gk20a_alloc(&a->source_allocator, - chunk_len); - - /* Divide by 2 and try again */ - if (!chunk_addr) { - palloc_dbg(a, "balloc failed: 0x%llx\n", - chunk_len); - chunk_len >>= 1; - max_chunk_len = chunk_len; - } - } while (!chunk_addr && chunk_len >= a->page_size); - - chunk_pages = chunk_len >> a->page_shift; - - if (!chunk_addr) { - palloc_dbg(a, "bailing @ 0x%llx\n", chunk_len); - goto fail_cleanup; - } - - c = kmem_cache_alloc(page_alloc_chunk_cache, GFP_KERNEL); - if (!c) { - gk20a_free(&a->source_allocator, chunk_addr); - goto fail_cleanup; - } - - pages -= chunk_pages; - - c->base = chunk_addr; - c->length = chunk_len; - list_add(&c->list_entry, &alloc->alloc_chunks); - - i++; - } - - alloc->nr_chunks = i; - c = list_first_entry(&alloc->alloc_chunks, - struct page_alloc_chunk, list_entry); - alloc->base = c->base; - - return alloc; - -fail_cleanup: - while (!list_empty(&alloc->alloc_chunks)) { - c = list_first_entry(&alloc->alloc_chunks, - struct page_alloc_chunk, list_entry); - list_del(&c->list_entry); - gk20a_free(&a->source_allocator, c->base); - kfree(c); - } - kfree(alloc); -fail: - return ERR_PTR(-ENOMEM); -} - -static struct gk20a_page_alloc *__gk20a_alloc_pages( - struct gk20a_page_allocator *a, u64 len) -{ - struct gk20a_page_alloc *alloc = NULL; - struct page_alloc_chunk *c; - u64 pages; - int i = 0; - - pages = ALIGN(len, a->page_size) >> a->page_shift; - - alloc = __do_gk20a_alloc_pages(a, pages); - if (IS_ERR(alloc)) { - palloc_dbg(a, "Alloc 0x%llx (%llu) (failed)\n", - pages << a->page_shift, pages); - return NULL; - } - - palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n", - pages << a->page_shift, pages, alloc->base); - list_for_each_entry(c, &alloc->alloc_chunks, list_entry) { - palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", - i++, c->base, c->length); - } - - return alloc; -} - -/* - * Allocate enough pages to satisfy @len. Page size is determined at - * initialization of the allocator. - * - * The return is actually a pointer to a struct gk20a_page_alloc pointer. This - * is because it doesn't make a lot of sense to return the address of the first - * page in the list of pages (since they could be discontiguous). This has - * precedent in the dma_alloc APIs, though, it's really just an annoying - * artifact of the fact that the gk20a_alloc() API requires a u64 return type. - */ -static u64 gk20a_page_alloc(struct gk20a_allocator *__a, u64 len) -{ - struct gk20a_page_allocator *a = page_allocator(__a); - struct gk20a_page_alloc *alloc = NULL; - u64 real_len; - - /* - * If we want contig pages we have to round up to a power of two. It's - * easier to do that here than in the buddy allocator. - */ - real_len = a->flags & GPU_ALLOC_FORCE_CONTIG ? - roundup_pow_of_two(len) : len; - - alloc_lock(__a); - if (a->flags & GPU_ALLOC_4K_VIDMEM_PAGES && - real_len <= (a->page_size / 2)) - alloc = __gk20a_alloc_slab(a, real_len); - else - alloc = __gk20a_alloc_pages(a, real_len); - - if (!alloc) { - alloc_unlock(__a); - return 0; - } - - __insert_page_alloc(a, alloc); - - a->nr_allocs++; - if (real_len > a->page_size / 2) - a->pages_alloced += alloc->length >> a->page_shift; - alloc_unlock(__a); - - if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) - return alloc->base; - else - return (u64) (uintptr_t) alloc; -} - -/* - * Note: this will remove the gk20a_page_alloc struct from the RB tree - * if it's found. - */ -static void gk20a_page_free(struct gk20a_allocator *__a, u64 base) -{ - struct gk20a_page_allocator *a = page_allocator(__a); - struct gk20a_page_alloc *alloc; - - alloc_lock(__a); - - if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) - alloc = __find_page_alloc(a, base); - else - alloc = __find_page_alloc(a, - ((struct gk20a_page_alloc *)(uintptr_t)base)->base); - - if (!alloc) { - palloc_dbg(a, "Hrm, found no alloc?\n"); - goto done; - } - - a->nr_frees++; - - palloc_dbg(a, "Free 0x%llx id=0x%010llx\n", - alloc->length, alloc->base); - - /* - * Frees *alloc. - */ - if (alloc->slab_page) { - __gk20a_free_slab(a, alloc); - } else { - a->pages_freed += (alloc->length >> a->page_shift); - __gk20a_free_pages(a, alloc, true); - } - -done: - alloc_unlock(__a); -} - -static struct gk20a_page_alloc *__gk20a_alloc_pages_fixed( - struct gk20a_page_allocator *a, u64 base, u64 length) -{ - struct gk20a_page_alloc *alloc; - struct page_alloc_chunk *c; - - alloc = kmem_cache_alloc(page_alloc_cache, GFP_KERNEL); - c = kmem_cache_alloc(page_alloc_chunk_cache, GFP_KERNEL); - if (!alloc || !c) - goto fail; - - alloc->base = gk20a_alloc_fixed(&a->source_allocator, base, length); - if (!alloc->base) { - WARN(1, "gk20a: failed to fixed alloc pages @ 0x%010llx", base); - goto fail; - } - - alloc->nr_chunks = 1; - alloc->length = length; - INIT_LIST_HEAD(&alloc->alloc_chunks); - - c->base = alloc->base; - c->length = length; - list_add(&c->list_entry, &alloc->alloc_chunks); - - return alloc; - -fail: - kfree(c); - kfree(alloc); - return ERR_PTR(-ENOMEM); -} - -static u64 gk20a_page_alloc_fixed(struct gk20a_allocator *__a, - u64 base, u64 len) -{ - struct gk20a_page_allocator *a = page_allocator(__a); - struct gk20a_page_alloc *alloc = NULL; - struct page_alloc_chunk *c; - u64 aligned_len, pages; - int i = 0; - - aligned_len = ALIGN(len, a->page_size); - pages = aligned_len >> a->page_shift; - - alloc_lock(__a); - - alloc = __gk20a_alloc_pages_fixed(a, base, aligned_len); - if (IS_ERR(alloc)) { - alloc_unlock(__a); - return 0; - } - - __insert_page_alloc(a, alloc); - alloc_unlock(__a); - - palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)\n", - alloc->base, aligned_len, pages); - list_for_each_entry(c, &alloc->alloc_chunks, list_entry) { - palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", - i++, c->base, c->length); - } - - a->nr_fixed_allocs++; - a->pages_alloced += pages; - - if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) - return alloc->base; - else - return (u64) (uintptr_t) alloc; -} - -static void gk20a_page_free_fixed(struct gk20a_allocator *__a, - u64 base, u64 len) -{ - struct gk20a_page_allocator *a = page_allocator(__a); - struct gk20a_page_alloc *alloc; - - alloc_lock(__a); - - if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) { - alloc = __find_page_alloc(a, base); - if (!alloc) - goto done; - } else { - alloc = (struct gk20a_page_alloc *) (uintptr_t) base; - } - - palloc_dbg(a, "Free [fixed] 0x%010llx + 0x%llx\n", - alloc->base, alloc->length); - - a->nr_fixed_frees++; - a->pages_freed += (alloc->length >> a->page_shift); - - /* - * This works for the time being since the buddy allocator - * uses the same free function for both fixed and regular - * allocs. This would have to be updated if the underlying - * allocator were to change. - */ - __gk20a_free_pages(a, alloc, true); - -done: - alloc_unlock(__a); -} - -static void gk20a_page_allocator_destroy(struct gk20a_allocator *__a) -{ - struct gk20a_page_allocator *a = page_allocator(__a); - - alloc_lock(__a); - kfree(a); - __a->priv = NULL; - alloc_unlock(__a); -} - -static void gk20a_page_print_stats(struct gk20a_allocator *__a, - struct seq_file *s, int lock) -{ - struct gk20a_page_allocator *a = page_allocator(__a); - int i; - - if (lock) - alloc_lock(__a); - - __alloc_pstat(s, __a, "Page allocator:\n"); - __alloc_pstat(s, __a, " allocs %lld\n", a->nr_allocs); - __alloc_pstat(s, __a, " frees %lld\n", a->nr_frees); - __alloc_pstat(s, __a, " fixed_allocs %lld\n", a->nr_fixed_allocs); - __alloc_pstat(s, __a, " fixed_frees %lld\n", a->nr_fixed_frees); - __alloc_pstat(s, __a, " slab_allocs %lld\n", a->nr_slab_allocs); - __alloc_pstat(s, __a, " slab_frees %lld\n", a->nr_slab_frees); - __alloc_pstat(s, __a, " pages alloced %lld\n", a->pages_alloced); - __alloc_pstat(s, __a, " pages freed %lld\n", a->pages_freed); - __alloc_pstat(s, __a, "\n"); - - /* - * Slab info. - */ - if (a->flags & GPU_ALLOC_4K_VIDMEM_PAGES) { - __alloc_pstat(s, __a, "Slabs:\n"); - __alloc_pstat(s, __a, " size empty partial full\n"); - __alloc_pstat(s, __a, " ---- ----- ------- ----\n"); - - for (i = 0; i < a->nr_slabs; i++) { - struct page_alloc_slab *slab = &a->slabs[i]; - - __alloc_pstat(s, __a, " %-9u %-9d %-9u %u\n", - slab->slab_size, - slab->nr_empty, slab->nr_partial, - slab->nr_full); - } - __alloc_pstat(s, __a, "\n"); - } - - __alloc_pstat(s, __a, "Source alloc: %s\n", - a->source_allocator.name); - gk20a_alloc_print_stats(&a->source_allocator, s, lock); - - if (lock) - alloc_unlock(__a); -} - -static const struct gk20a_allocator_ops page_ops = { - .alloc = gk20a_page_alloc, - .free = gk20a_page_free, - - .alloc_fixed = gk20a_page_alloc_fixed, - .free_fixed = gk20a_page_free_fixed, - - .reserve_carveout = gk20a_page_reserve_co, - .release_carveout = gk20a_page_release_co, - - .base = gk20a_page_alloc_base, - .length = gk20a_page_alloc_length, - .end = gk20a_page_alloc_end, - .inited = gk20a_page_alloc_inited, - .space = gk20a_page_alloc_space, - - .fini = gk20a_page_allocator_destroy, - - .print_stats = gk20a_page_print_stats, -}; - -/* - * nr_slabs is computed as follows: divide page_size by 4096 to get number of - * 4k pages in page_size. Then take the base 2 log of that to get number of - * slabs. For 64k page_size that works on like: - * - * 1024*64 / 1024*4 = 16 - * ilog2(16) = 4 - * - * That gives buckets of 1, 2, 4, and 8 pages (i.e 4k, 8k, 16k, 32k). - */ -static int gk20a_page_alloc_init_slabs(struct gk20a_page_allocator *a) -{ - size_t nr_slabs = ilog2(a->page_size >> 12); - unsigned int i; - - a->slabs = kcalloc(nr_slabs, - sizeof(struct page_alloc_slab), - GFP_KERNEL); - if (!a->slabs) - return -ENOMEM; - a->nr_slabs = nr_slabs; - - for (i = 0; i < nr_slabs; i++) { - struct page_alloc_slab *slab = &a->slabs[i]; - - slab->slab_size = SZ_4K * (1 << i); - INIT_LIST_HEAD(&slab->empty); - INIT_LIST_HEAD(&slab->partial); - INIT_LIST_HEAD(&slab->full); - slab->nr_empty = 0; - slab->nr_partial = 0; - slab->nr_full = 0; - } - - return 0; -} - -int gk20a_page_allocator_init(struct gk20a *g, struct gk20a_allocator *__a, - const char *name, u64 base, u64 length, - u64 blk_size, u64 flags) -{ - struct gk20a_page_allocator *a; - char buddy_name[sizeof(__a->name)]; - int err; - - mutex_lock(&meta_data_cache_lock); - if (!page_alloc_cache) - page_alloc_cache = KMEM_CACHE(gk20a_page_alloc, 0); - if (!page_alloc_chunk_cache) - page_alloc_chunk_cache = KMEM_CACHE(page_alloc_chunk, 0); - if (!page_alloc_slab_page_cache) - page_alloc_slab_page_cache = - KMEM_CACHE(page_alloc_slab_page, 0); - mutex_unlock(&meta_data_cache_lock); - - if (!page_alloc_cache || !page_alloc_chunk_cache) - return -ENOMEM; - - if (blk_size < SZ_4K) - return -EINVAL; - - a = kzalloc(sizeof(struct gk20a_page_allocator), GFP_KERNEL); - if (!a) - return -ENOMEM; - - err = __gk20a_alloc_common_init(__a, name, a, false, &page_ops); - if (err) - goto fail; - - a->base = base; - a->length = length; - a->page_size = blk_size; - a->page_shift = __ffs(blk_size); - a->allocs = RB_ROOT; - a->owner = __a; - a->flags = flags; - - if (flags & GPU_ALLOC_4K_VIDMEM_PAGES && blk_size > SZ_4K) { - err = gk20a_page_alloc_init_slabs(a); - if (err) - goto fail; - } - - snprintf(buddy_name, sizeof(buddy_name), "%s-src", name); - - err = gk20a_buddy_allocator_init(g, &a->source_allocator, buddy_name, - base, length, blk_size, 0); - if (err) - goto fail; - - gk20a_init_alloc_debug(g, __a); - palloc_dbg(a, "New allocator: type page\n"); - palloc_dbg(a, " base 0x%llx\n", a->base); - palloc_dbg(a, " size 0x%llx\n", a->length); - palloc_dbg(a, " page_size 0x%llx\n", a->page_size); - palloc_dbg(a, " flags 0x%llx\n", a->flags); - palloc_dbg(a, " slabs: %d\n", a->nr_slabs); - - return 0; - -fail: - kfree(a); - return err; -} diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 39562ec1..2ee2dd43 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -3400,7 +3400,7 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr) gr->ctx_vars.local_golden_image = NULL; if (gr->ctx_vars.hwpm_ctxsw_buffer_offset_map) - nvgpu_free(gr->ctx_vars.hwpm_ctxsw_buffer_offset_map); + nvgpu_kfree(gr->ctx_vars.hwpm_ctxsw_buffer_offset_map); gr->ctx_vars.hwpm_ctxsw_buffer_offset_map = NULL; gk20a_comptag_allocator_destroy(&gr->comp_tags); @@ -7998,7 +7998,7 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g) hwpm_ctxsw_reg_count_max = hwpm_ctxsw_buffer_size >> 2; map_size = hwpm_ctxsw_reg_count_max * sizeof(*map); - map = nvgpu_alloc(map_size, true); + map = nvgpu_kalloc(map_size, true); if (!map) return -ENOMEM; @@ -8088,7 +8088,7 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g) return 0; cleanup: gk20a_err(dev_from_gk20a(g), "Failed to create HWPM buffer offset map"); - nvgpu_free(map); + nvgpu_kfree(map); return -EINVAL; } diff --git a/drivers/gpu/nvgpu/gk20a/lockless_allocator_priv.h b/drivers/gpu/nvgpu/gk20a/lockless_allocator_priv.h deleted file mode 100644 index f9b03e0e..00000000 --- a/drivers/gpu/nvgpu/gk20a/lockless_allocator_priv.h +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -/* - * Basics: - * - * - Lockless memory allocator for fixed-size structures, whose - * size is defined up front at init time. - * - Memory footprint scales linearly w/ the number of structures in - * the pool. It is ~= sizeof(int) * N. - * - Memory is pre-allocated by the client. The allocator itself - * only computes the addresses for allocations. - * - Limit of MAX_INT nodes that the allocator can be responsible for. - * - * Implementation details: - * - * The allocator maintains a single list of free nodes. We allocate & - * free nodes from the head of the list. We rely on the cmpxchg() operator - * to maintain atomicity on the head. - * - * So, both allocs & frees are O(1)!! - * - * -- Definitions -- - * Block Size - size of a single structure that this allocator will - * allocate. - * Node - one of the elements of size blk_size in the - * client-allocated buffer. - * Node Index - zero-based index of a node in the client-allocated - * contiguous buffer. - * - * -- Initial State -- - * We maintain the following to track the state of the free list: - * - * 1) A "head" index to track the index of the first free node in the list - * 2) A "next" array to track the index of the next free node in the list - * for every node. So next[head], will give the index to the 2nd free - * element in the list. - * - * So, to begin with, the free list consists of all node indices, and each - * position in the next array contains index N + 1: - * - * head = 0 - * next = [1, 2, 3, 4, -1] : Example for a user-allocated buffer of 5 nodes - * free_list = 0->1->2->3->4->-1 - * - * -- Allocations -- - * 1) Read the current head (aka acq_head) - * 2) Read next[acq_head], to get the 2nd free element (aka new_head) - * 3) cmp_xchg(&head, acq_head, new_head) - * 4) If it succeeds, compute the address of the node, based on - * base address, blk_size, & acq_head. - * - * head = 1; - * next = [1, 2, 3, 4, -1] : Example after allocating Node #0 - * free_list = 1->2->3->4->-1 - * - * head = 2; - * next = [1, 2, 3, 4, -1] : Example after allocating Node #1 - * free_list = 2->3->4->-1 - * - * -- Frees -- - * 1) Based on the address to be freed, calculate the index of the node - * being freed (cur_idx) - * 2) Read the current head (old_head) - * 3) So the freed node is going to go at the head of the list, and we - * want to put the old_head after it. So next[cur_idx] = old_head - * 4) cmpxchg(head, old_head, cur_idx) - * - * head = 0 - * next = [2, 2, 3, 4, -1] - * free_list = 0->2->3->4->-1 : Example after freeing Node #0 - * - * head = 1 - * next = [2, 0, 3, 4, -1] - * free_list = 1->0->2->3->4->-1 : Example after freeing Node #1 - */ - -#ifndef LOCKLESS_ALLOCATOR_PRIV_H -#define LOCKLESS_ALLOCATOR_PRIV_H - -struct gk20a_allocator; - -struct gk20a_lockless_allocator { - struct gk20a_allocator *owner; - - u64 base; /* Base address of the space. */ - u64 length; /* Length of the space. */ - u64 blk_size; /* Size of the structure being allocated */ - int nr_nodes; /* Number of nodes available for allocation */ - - int *next; /* An array holding the next indices per node */ - int head; /* Current node at the top of the stack */ - - u64 flags; - - bool inited; - - /* Statistics */ - atomic_t nr_allocs; -}; - -static inline struct gk20a_lockless_allocator *lockless_allocator( - struct gk20a_allocator *a) -{ - return (struct gk20a_lockless_allocator *)(a)->priv; -} - -#endif diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 2e338fef..d594a5a4 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -31,9 +31,9 @@ #include #include -#include - #include +#include +#include #include "gk20a.h" #include "mm_gk20a.h" @@ -74,7 +74,7 @@ is_vidmem_page_alloc(u64 addr) return !!(addr & 1ULL); } -static inline struct gk20a_page_alloc * +static inline struct nvgpu_page_alloc * get_vidmem_page_alloc(struct scatterlist *sgl) { u64 addr; @@ -86,7 +86,7 @@ get_vidmem_page_alloc(struct scatterlist *sgl) else WARN_ON(1); - return (struct gk20a_page_alloc *)(uintptr_t)addr; + return (struct nvgpu_page_alloc *)(uintptr_t)addr; } int gk20a_mem_begin(struct gk20a *g, struct mem_desc *mem) @@ -176,7 +176,7 @@ typedef void (*pramin_access_batch_fn)(struct gk20a *g, u32 start, u32 words, static inline void pramin_access_batched(struct gk20a *g, struct mem_desc *mem, u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg) { - struct gk20a_page_alloc *alloc = NULL; + struct nvgpu_page_alloc *alloc = NULL; struct page_alloc_chunk *chunk = NULL; u32 byteoff, start_reg, until_end, n; @@ -797,8 +797,8 @@ void gk20a_remove_vm(struct vm_gk20a *vm, struct mem_desc *inst_block) static void gk20a_vidmem_destroy(struct gk20a *g) { #if defined(CONFIG_GK20A_VIDMEM) - if (gk20a_alloc_initialized(&g->mm.vidmem.allocator)) - gk20a_alloc_destroy(&g->mm.vidmem.allocator); + if (nvgpu_alloc_initialized(&g->mm.vidmem.allocator)) + nvgpu_alloc_destroy(&g->mm.vidmem.allocator); #endif } @@ -928,8 +928,8 @@ static int gk20a_init_vidmem(struct mm_gk20a *mm) u64 default_page_size = SZ_64K; int err; - static struct gk20a_alloc_carveout wpr_co = - GK20A_CARVEOUT("wpr-region", 0, SZ_16M); + static struct nvgpu_alloc_carveout wpr_co = + NVGPU_CARVEOUT("wpr-region", 0, SZ_16M); if (!size) return 0; @@ -944,12 +944,12 @@ static int gk20a_init_vidmem(struct mm_gk20a *mm) * initialization requires vidmem but we want to use the CE to zero * out vidmem before allocating it... */ - err = gk20a_page_allocator_init(g, &g->mm.vidmem.bootstrap_allocator, + err = nvgpu_page_allocator_init(g, &g->mm.vidmem.bootstrap_allocator, "vidmem-bootstrap", bootstrap_base, bootstrap_size, SZ_4K, 0); - err = gk20a_page_allocator_init(g, &g->mm.vidmem.allocator, + err = nvgpu_page_allocator_init(g, &g->mm.vidmem.allocator, "vidmem", base, size - base, default_page_size, @@ -961,7 +961,7 @@ static int gk20a_init_vidmem(struct mm_gk20a *mm) } /* Reserve bootstrap region in vidmem allocator */ - gk20a_alloc_reserve_carveout(&g->mm.vidmem.allocator, &wpr_co); + nvgpu_alloc_reserve_carveout(&g->mm.vidmem.allocator, &wpr_co); mm->vidmem.base = base; mm->vidmem.size = size - base; @@ -1482,7 +1482,7 @@ int gk20a_vm_get_buffers(struct vm_gk20a *vm, mutex_lock(&vm->update_gmmu_lock); - buffer_list = nvgpu_alloc(sizeof(*buffer_list) * + buffer_list = nvgpu_kalloc(sizeof(*buffer_list) * vm->num_user_mapped_buffers, true); if (!buffer_list) { mutex_unlock(&vm->update_gmmu_lock); @@ -1567,7 +1567,7 @@ void gk20a_vm_put_buffers(struct vm_gk20a *vm, gk20a_vm_mapping_batch_finish_locked(vm, &batch); mutex_unlock(&vm->update_gmmu_lock); - nvgpu_free(mapped_buffers); + nvgpu_kfree(mapped_buffers); } static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset, @@ -1623,7 +1623,7 @@ u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, enum gmmu_pgsz_gk20a gmmu_pgsz_idx) { - struct gk20a_allocator *vma = &vm->vma[gmmu_pgsz_idx]; + struct nvgpu_allocator *vma = &vm->vma[gmmu_pgsz_idx]; u64 offset; u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; @@ -1645,7 +1645,7 @@ u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size, vm->gmmu_page_sizes[gmmu_pgsz_idx]>>10); - offset = gk20a_alloc(vma, size); + offset = nvgpu_alloc(vma, size); if (!offset) { gk20a_err(dev_from_vm(vm), "%s oom: sz=0x%llx", vma->name, size); @@ -1660,11 +1660,11 @@ int gk20a_vm_free_va(struct vm_gk20a *vm, u64 offset, u64 size, enum gmmu_pgsz_gk20a pgsz_idx) { - struct gk20a_allocator *vma = &vm->vma[pgsz_idx]; + struct nvgpu_allocator *vma = &vm->vma[pgsz_idx]; gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx", vma->name, offset, size); - gk20a_free(vma, offset); + nvgpu_free(vma, offset); return 0; } @@ -2302,15 +2302,15 @@ err_kfree: int gk20a_vidmem_get_space(struct gk20a *g, u64 *space) { #if defined(CONFIG_GK20A_VIDMEM) - struct gk20a_allocator *allocator = &g->mm.vidmem.allocator; + struct nvgpu_allocator *allocator = &g->mm.vidmem.allocator; gk20a_dbg_fn(""); - if (!gk20a_alloc_initialized(allocator)) + if (!nvgpu_alloc_initialized(allocator)) return -ENOSYS; mutex_lock(&g->mm.vidmem.clear_list_mutex); - *space = gk20a_alloc_space(allocator) + + *space = nvgpu_alloc_space(allocator) + atomic64_read(&g->mm.vidmem.bytes_pending); mutex_unlock(&g->mm.vidmem.clear_list_mutex); return 0; @@ -2359,7 +2359,7 @@ static u64 gk20a_mm_get_align(struct gk20a *g, struct scatterlist *sgl, u64 buf_addr; if (aperture == APERTURE_VIDMEM) { - struct gk20a_page_alloc *alloc = get_vidmem_page_alloc(sgl); + struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl); struct page_alloc_chunk *chunk = NULL; list_for_each_entry(chunk, &alloc->alloc_chunks, list_entry) { @@ -3068,7 +3068,7 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct mem_desc *mem) { struct gk20a_fence *gk20a_fence_out = NULL; struct gk20a_fence *gk20a_last_fence = NULL; - struct gk20a_page_alloc *alloc = NULL; + struct nvgpu_page_alloc *alloc = NULL; struct page_alloc_chunk *chunk = NULL; int err = 0; @@ -3134,15 +3134,15 @@ int gk20a_gmmu_alloc_attr_vid(struct gk20a *g, enum dma_attr attr, } #if defined(CONFIG_GK20A_VIDMEM) -static u64 __gk20a_gmmu_alloc(struct gk20a_allocator *allocator, dma_addr_t at, +static u64 __gk20a_gmmu_alloc(struct nvgpu_allocator *allocator, dma_addr_t at, size_t size) { u64 addr = 0; if (at) - addr = gk20a_alloc_fixed(allocator, at, size); + addr = nvgpu_alloc_fixed(allocator, at, size); else - addr = gk20a_alloc(allocator, size); + addr = nvgpu_alloc(allocator, size); return addr; } @@ -3154,14 +3154,14 @@ int gk20a_gmmu_alloc_attr_vid_at(struct gk20a *g, enum dma_attr attr, #if defined(CONFIG_GK20A_VIDMEM) u64 addr; int err; - struct gk20a_allocator *vidmem_alloc = g->mm.vidmem.cleared ? + struct nvgpu_allocator *vidmem_alloc = g->mm.vidmem.cleared ? &g->mm.vidmem.allocator : &g->mm.vidmem.bootstrap_allocator; int before_pending; gk20a_dbg_fn(""); - if (!gk20a_alloc_initialized(&g->mm.vidmem.allocator)) + if (!nvgpu_alloc_initialized(&g->mm.vidmem.allocator)) return -ENOSYS; /* we don't support dma attributes here, except that kernel mappings @@ -3214,7 +3214,7 @@ int gk20a_gmmu_alloc_attr_vid_at(struct gk20a *g, enum dma_attr attr, fail_kfree: kfree(mem->sgt); fail_physfree: - gk20a_free(&g->mm.vidmem.allocator, addr); + nvgpu_free(&g->mm.vidmem.allocator, addr); return err; #else return -ENOSYS; @@ -3241,7 +3241,7 @@ static void gk20a_gmmu_free_attr_vid(struct gk20a *g, enum dma_attr attr, } } else { gk20a_memset(g, mem, 0, 0, mem->size); - gk20a_free(mem->allocator, + nvgpu_free(mem->allocator, (u64)get_vidmem_page_alloc(mem->sgt->sgl)); gk20a_free_sgtable(&mem->sgt); @@ -3276,7 +3276,7 @@ void gk20a_gmmu_free(struct gk20a *g, struct mem_desc *mem) u64 gk20a_mem_get_base_addr(struct gk20a *g, struct mem_desc *mem, u32 flags) { - struct gk20a_page_alloc *alloc; + struct nvgpu_page_alloc *alloc; u64 addr; if (mem->aperture == APERTURE_VIDMEM) { @@ -3317,7 +3317,7 @@ static void gk20a_vidmem_clear_mem_worker(struct work_struct *work) while ((mem = get_pending_mem_desc(mm)) != NULL) { gk20a_gmmu_clear_vidmem_mem(g, mem); - gk20a_free(mem->allocator, + nvgpu_free(mem->allocator, (u64)get_vidmem_page_alloc(mem->sgt->sgl)); gk20a_free_sgtable(&mem->sgt); @@ -3905,7 +3905,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; int err; struct scatterlist *sgl = NULL; - struct gk20a_page_alloc *alloc = NULL; + struct nvgpu_page_alloc *alloc = NULL; struct page_alloc_chunk *chunk = NULL; u64 length; @@ -4251,12 +4251,12 @@ static int gk20a_init_sema_pool(struct vm_gk20a *vm) * * !!! TODO: cleanup. */ - sema_sea->gpu_va = gk20a_alloc_fixed(&vm->vma[gmmu_page_size_kernel], + sema_sea->gpu_va = nvgpu_alloc_fixed(&vm->vma[gmmu_page_size_kernel], vm->va_limit - mm->channel.kernel_size, 512 * PAGE_SIZE); if (!sema_sea->gpu_va) { - gk20a_free(&vm->vma[gmmu_page_size_small], sema_sea->gpu_va); + nvgpu_free(&vm->vma[gmmu_page_size_small], sema_sea->gpu_va); gk20a_vm_put(vm); return -ENOMEM; } @@ -4264,7 +4264,7 @@ static int gk20a_init_sema_pool(struct vm_gk20a *vm) err = gk20a_semaphore_pool_map(vm->sema_pool, vm); if (err) { gk20a_semaphore_pool_unmap(vm->sema_pool, vm); - gk20a_free(&vm->vma[gmmu_page_size_small], + nvgpu_free(&vm->vma[gmmu_page_size_small], vm->sema_pool->gpu_va); gk20a_vm_put(vm); } @@ -4387,7 +4387,7 @@ int gk20a_init_vm(struct mm_gk20a *mm, snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-fixed", name); - err = __gk20a_buddy_allocator_init(g, &vm->fixed, + err = __nvgpu_buddy_allocator_init(g, &vm->fixed, vm, alloc_name, small_vma_start, g->separate_fixed_allocs, @@ -4404,7 +4404,7 @@ int gk20a_init_vm(struct mm_gk20a *mm, if (small_vma_start < small_vma_limit) { snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name, vm->gmmu_page_sizes[gmmu_page_size_small] >> 10); - err = __gk20a_buddy_allocator_init( + err = __nvgpu_buddy_allocator_init( g, &vm->vma[gmmu_page_size_small], vm, alloc_name, @@ -4420,7 +4420,7 @@ int gk20a_init_vm(struct mm_gk20a *mm, if (large_vma_start < large_vma_limit) { snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name, vm->gmmu_page_sizes[gmmu_page_size_big] >> 10); - err = __gk20a_buddy_allocator_init( + err = __nvgpu_buddy_allocator_init( g, &vm->vma[gmmu_page_size_big], vm, alloc_name, @@ -4438,7 +4438,7 @@ int gk20a_init_vm(struct mm_gk20a *mm, /* * kernel reserved VMA is at the end of the aperture */ - err = __gk20a_buddy_allocator_init(g, &vm->vma[gmmu_page_size_kernel], + err = __nvgpu_buddy_allocator_init(g, &vm->vma[gmmu_page_size_kernel], vm, alloc_name, kernel_vma_start, kernel_vma_limit - kernel_vma_start, @@ -4469,10 +4469,10 @@ int gk20a_init_vm(struct mm_gk20a *mm, clean_up_big_allocator: if (large_vma_start < large_vma_limit) - gk20a_alloc_destroy(&vm->vma[gmmu_page_size_big]); + nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_big]); clean_up_small_allocator: if (small_vma_start < small_vma_limit) - gk20a_alloc_destroy(&vm->vma[gmmu_page_size_small]); + nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_small]); clean_up_ptes: free_gmmu_pages(vm, &vm->pdb); clean_up_pdes: @@ -4547,7 +4547,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, { int err = -ENOMEM; int pgsz_idx = gmmu_page_size_small; - struct gk20a_allocator *vma; + struct nvgpu_allocator *vma; struct vm_gk20a *vm = as_share->vm; struct gk20a *g = vm->mm->g; struct vm_reserved_va_node *va_node; @@ -4579,13 +4579,13 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, vma = &vm->vma[pgsz_idx]; if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) { - if (gk20a_alloc_initialized(&vm->fixed)) + if (nvgpu_alloc_initialized(&vm->fixed)) vma = &vm->fixed; - vaddr_start = gk20a_alloc_fixed(vma, args->o_a.offset, + vaddr_start = nvgpu_alloc_fixed(vma, args->o_a.offset, (u64)args->pages * (u64)args->page_size); } else { - vaddr_start = gk20a_alloc(vma, + vaddr_start = nvgpu_alloc(vma, (u64)args->pages * (u64)args->page_size); } @@ -4621,7 +4621,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, APERTURE_INVALID); if (!map_offset) { mutex_unlock(&vm->update_gmmu_lock); - gk20a_free(vma, vaddr_start); + nvgpu_free(vma, vaddr_start); kfree(va_node); goto clean_up; } @@ -4644,7 +4644,7 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share, { int err = -ENOMEM; int pgsz_idx; - struct gk20a_allocator *vma; + struct nvgpu_allocator *vma; struct vm_gk20a *vm = as_share->vm; struct vm_reserved_va_node *va_node; struct gk20a *g = gk20a_from_vm(vm); @@ -4656,11 +4656,11 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share, pgsz_idx = __nv_gmmu_va_is_big_page_region(vm, args->offset) ? gmmu_page_size_big : gmmu_page_size_small; - if (gk20a_alloc_initialized(&vm->fixed)) + if (nvgpu_alloc_initialized(&vm->fixed)) vma = &vm->fixed; else vma = &vm->vma[pgsz_idx]; - gk20a_free(vma, args->offset); + nvgpu_free(vma, args->offset); mutex_lock(&vm->update_gmmu_lock); va_node = addr_to_reservation(vm, args->offset); @@ -4844,13 +4844,13 @@ int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset, void gk20a_deinit_vm(struct vm_gk20a *vm) { - gk20a_alloc_destroy(&vm->vma[gmmu_page_size_kernel]); - if (gk20a_alloc_initialized(&vm->vma[gmmu_page_size_big])) - gk20a_alloc_destroy(&vm->vma[gmmu_page_size_big]); - if (gk20a_alloc_initialized(&vm->vma[gmmu_page_size_small])) - gk20a_alloc_destroy(&vm->vma[gmmu_page_size_small]); - if (gk20a_alloc_initialized(&vm->fixed)) - gk20a_alloc_destroy(&vm->fixed); + nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_kernel]); + if (nvgpu_alloc_initialized(&vm->vma[gmmu_page_size_big])) + nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_big]); + if (nvgpu_alloc_initialized(&vm->vma[gmmu_page_size_small])) + nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_small]); + if (nvgpu_alloc_initialized(&vm->fixed)) + nvgpu_alloc_destroy(&vm->fixed); gk20a_vm_free_entries(vm, &vm->pdb, 0); } diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index d32e121a..f58b5df5 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -27,7 +27,8 @@ #include #include #include -#include "gk20a_allocator.h" + +#include #ifdef CONFIG_ARM64 #define outer_flush_range(a, b) @@ -70,7 +71,7 @@ struct mem_desc { u64 gpu_va; bool fixed; /* vidmem only */ bool user_mem; /* vidmem only */ - struct gk20a_allocator *allocator; /* vidmem only */ + struct nvgpu_allocator *allocator; /* vidmem only */ struct list_head clear_list_entry; /* vidmem only */ bool skip_wmb; }; @@ -295,10 +296,10 @@ struct vm_gk20a { struct gk20a_mm_entry pdb; - struct gk20a_allocator vma[gmmu_nr_page_sizes]; + struct nvgpu_allocator vma[gmmu_nr_page_sizes]; /* If necessary, split fixed from non-fixed. */ - struct gk20a_allocator fixed; + struct nvgpu_allocator fixed; struct rb_root mapped_buffers; @@ -421,8 +422,8 @@ struct mm_gk20a { size_t bootstrap_size; u64 bootstrap_base; - struct gk20a_allocator allocator; - struct gk20a_allocator bootstrap_allocator; + struct nvgpu_allocator allocator; + struct nvgpu_allocator bootstrap_allocator; u32 ce_ctx_id; volatile bool cleared; @@ -470,13 +471,13 @@ static inline u64 __nv_gmmu_va_small_page_limit(void) static inline int __nv_gmmu_va_is_big_page_region(struct vm_gk20a *vm, u64 addr) { - struct gk20a_allocator *a = &vm->vma[gmmu_page_size_big]; + struct nvgpu_allocator *a = &vm->vma[gmmu_page_size_big]; if (!vm->big_pages) return 0; - return addr >= gk20a_alloc_base(a) && - addr < gk20a_alloc_base(a) + gk20a_alloc_length(a); + return addr >= nvgpu_alloc_base(a) && + addr < nvgpu_alloc_base(a) + nvgpu_alloc_length(a); } /* @@ -825,7 +826,7 @@ void gk20a_remove_vm(struct vm_gk20a *vm, struct mem_desc *inst_block); extern const struct gk20a_mmu_level gk20a_mm_levels_64k[]; extern const struct gk20a_mmu_level gk20a_mm_levels_128k[]; -static inline void *nvgpu_alloc(size_t size, bool clear) +static inline void *nvgpu_kalloc(size_t size, bool clear) { void *p; @@ -844,7 +845,7 @@ static inline void *nvgpu_alloc(size_t size, bool clear) return p; } -static inline void nvgpu_free(void *p) +static inline void nvgpu_kfree(void *p) { if (virt_addr_valid(p)) kfree(p); diff --git a/drivers/gpu/nvgpu/gk20a/page_allocator_priv.h b/drivers/gpu/nvgpu/gk20a/page_allocator_priv.h deleted file mode 100644 index 7d7f43c2..00000000 --- a/drivers/gpu/nvgpu/gk20a/page_allocator_priv.h +++ /dev/null @@ -1,164 +0,0 @@ -/* - * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef PAGE_ALLOCATOR_PRIV_H -#define PAGE_ALLOCATOR_PRIV_H - -#include -#include - -#include "gk20a_allocator.h" - -struct gk20a_allocator; - -/* - * This allocator implements the ability to do SLAB style allocation since the - * GPU has two page sizes available - 4k and 64k/128k. When the default - * granularity is the large page size (64k/128k) small allocations become very - * space inefficient. This is most notable in PDE and PTE blocks which are 4k - * in size. - * - * Thus we need the ability to suballocate in 64k pages. The way we do this for - * the GPU is as follows. We have several buckets for sub-64K allocations: - * - * B0 - 4k - * B1 - 8k - * B3 - 16k - * B4 - 32k - * B5 - 64k (for when large pages are 128k) - * - * When an allocation comes in for less than the large page size (from now on - * assumed to be 64k) the allocation is satisfied by one of the buckets. - */ -struct page_alloc_slab { - struct list_head empty; - struct list_head partial; - struct list_head full; - - int nr_empty; - int nr_partial; - int nr_full; - - u32 slab_size; -}; - -enum slab_page_state { - SP_EMPTY, - SP_PARTIAL, - SP_FULL, - SP_NONE -}; - -struct page_alloc_slab_page { - unsigned long bitmap; - u64 page_addr; - u32 slab_size; - - u32 nr_objects; - u32 nr_objects_alloced; - - enum slab_page_state state; - - struct page_alloc_slab *owner; - struct list_head list_entry; -}; - -struct page_alloc_chunk { - struct list_head list_entry; - - u64 base; - u64 length; -}; - -/* - * Struct to handle internal management of page allocation. It holds a list - * of the chunks of pages that make up the overall allocation - much like a - * scatter gather table. - */ -struct gk20a_page_alloc { - struct list_head alloc_chunks; - - int nr_chunks; - u64 length; - - /* - * Only useful for the RB tree - since the alloc may have discontiguous - * pages the base is essentially irrelevant except for the fact that it - * is guarenteed to be unique. - */ - u64 base; - - struct rb_node tree_entry; - - /* - * Set if this is a slab alloc. Points back to the slab page that owns - * this particular allocation. nr_chunks will always be 1 if this is - * set. - */ - struct page_alloc_slab_page *slab_page; -}; - -struct gk20a_page_allocator { - struct gk20a_allocator *owner; /* Owner of this allocator. */ - - /* - * Use a buddy allocator to manage the allocation of the underlying - * pages. This lets us abstract the discontiguous allocation handling - * out of the annoyingly complicated buddy allocator. - */ - struct gk20a_allocator source_allocator; - - /* - * Page params. - */ - u64 base; - u64 length; - u64 page_size; - u32 page_shift; - - struct rb_root allocs; /* Outstanding allocations. */ - - struct page_alloc_slab *slabs; - int nr_slabs; - - u64 flags; - - /* - * Stat tracking. - */ - u64 nr_allocs; - u64 nr_frees; - u64 nr_fixed_allocs; - u64 nr_fixed_frees; - u64 nr_slab_allocs; - u64 nr_slab_frees; - u64 pages_alloced; - u64 pages_freed; -}; - -static inline struct gk20a_page_allocator *page_allocator( - struct gk20a_allocator *a) -{ - return (struct gk20a_page_allocator *)(a)->priv; -} - -static inline struct gk20a_allocator *palloc_owner( - struct gk20a_page_allocator *a) -{ - return a->owner; -} - -#endif diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c index e221be11..56ebda1a 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c @@ -2896,8 +2896,8 @@ void gk20a_remove_pmu_support(struct pmu_gk20a *pmu) { gk20a_dbg_fn(""); - if (gk20a_alloc_initialized(&pmu->dmem)) - gk20a_alloc_destroy(&pmu->dmem); + if (nvgpu_alloc_initialized(&pmu->dmem)) + nvgpu_alloc_destroy(&pmu->dmem); release_firmware(pmu->fw); } @@ -3607,7 +3607,7 @@ static int pmu_init_perfmon(struct pmu_gk20a *pmu) gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data); if (!pmu->sample_buffer) - pmu->sample_buffer = gk20a_alloc(&pmu->dmem, + pmu->sample_buffer = nvgpu_alloc(&pmu->dmem, 2 * sizeof(u16)); if (!pmu->sample_buffer) { gk20a_err(dev_from_gk20a(g), @@ -3708,7 +3708,7 @@ static int pmu_process_init_msg(struct pmu_gk20a *pmu, for (i = 0; i < PMU_QUEUE_COUNT; i++) pmu_queue_init(pmu, i, init); - if (!gk20a_alloc_initialized(&pmu->dmem)) { + if (!nvgpu_alloc_initialized(&pmu->dmem)) { /* Align start and end addresses */ u32 start = ALIGN(pv->get_pmu_init_msg_pmu_sw_mg_off(init), PMU_DMEM_ALLOC_ALIGNMENT); @@ -3716,9 +3716,9 @@ static int pmu_process_init_msg(struct pmu_gk20a *pmu, pv->get_pmu_init_msg_pmu_sw_mg_size(init)) & ~(PMU_DMEM_ALLOC_ALIGNMENT - 1); u32 size = end - start; - gk20a_bitmap_allocator_init(g, &pmu->dmem, "gk20a_pmu_dmem", - start, size, - PMU_DMEM_ALLOC_ALIGNMENT, 0); + nvgpu_bitmap_allocator_init(g, &pmu->dmem, "gk20a_pmu_dmem", + start, size, + PMU_DMEM_ALLOC_ALIGNMENT, 0); } pmu->pmu_ready = true; @@ -3855,12 +3855,12 @@ static int pmu_response_handle(struct pmu_gk20a *pmu, seq->callback = NULL; if (pv->pmu_allocation_get_dmem_size(pmu, pv->get_pmu_seq_in_a_ptr(seq)) != 0) - gk20a_free(&pmu->dmem, + nvgpu_free(&pmu->dmem, pv->pmu_allocation_get_dmem_offset(pmu, pv->get_pmu_seq_in_a_ptr(seq))); if (pv->pmu_allocation_get_dmem_size(pmu, pv->get_pmu_seq_out_a_ptr(seq)) != 0) - gk20a_free(&pmu->dmem, + nvgpu_free(&pmu->dmem, pv->pmu_allocation_get_dmem_offset(pmu, pv->get_pmu_seq_out_a_ptr(seq))); @@ -4601,7 +4601,7 @@ int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd, (u16)max(payload->in.size, payload->out.size)); *(pv->pmu_allocation_get_dmem_offset_addr(pmu, in)) = - gk20a_alloc(&pmu->dmem, + nvgpu_alloc(&pmu->dmem, pv->pmu_allocation_get_dmem_size(pmu, in)); if (!*(pv->pmu_allocation_get_dmem_offset_addr(pmu, in))) goto clean_up; @@ -4644,7 +4644,7 @@ int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd, if (payload->in.buf != payload->out.buf) { *(pv->pmu_allocation_get_dmem_offset_addr(pmu, out)) = - gk20a_alloc(&pmu->dmem, + nvgpu_alloc(&pmu->dmem, pv->pmu_allocation_get_dmem_size(pmu, out)); if (!*(pv->pmu_allocation_get_dmem_offset_addr(pmu, out))) @@ -4694,10 +4694,10 @@ int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd, clean_up: gk20a_dbg_fn("fail"); if (in) - gk20a_free(&pmu->dmem, + nvgpu_free(&pmu->dmem, pv->pmu_allocation_get_dmem_offset(pmu, in)); if (out) - gk20a_free(&pmu->dmem, + nvgpu_free(&pmu->dmem, pv->pmu_allocation_get_dmem_offset(pmu, out)); pmu_seq_release(pmu, seq); diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h index cf4f3b52..32e2ef54 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h @@ -709,7 +709,7 @@ struct pmu_gk20a { struct mutex pmu_copy_lock; struct mutex pmu_seq_lock; - struct gk20a_allocator dmem; + struct nvgpu_allocator dmem; u32 *ucode_image; bool pmu_ready; diff --git a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h index cf724fdb..8e09fcfc 100644 --- a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h @@ -18,10 +18,11 @@ #include #include +#include + #include "gk20a.h" #include "mm_gk20a.h" #include "channel_gk20a.h" -#include "gk20a_allocator.h" #define gpu_sema_dbg(fmt, args...) \ gk20a_dbg(gpu_dbg_sema, fmt, ##args) diff --git a/drivers/gpu/nvgpu/include/nvgpu/allocator.h b/drivers/gpu/nvgpu/include/nvgpu/allocator.h new file mode 100644 index 00000000..dee9b562 --- /dev/null +++ b/drivers/gpu/nvgpu/include/nvgpu/allocator.h @@ -0,0 +1,302 @@ +/* + * Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef NVGPU_ALLOCATOR_H +#define NVGPU_ALLOCATOR_H + +#include +#include +#include + +/* #define ALLOCATOR_DEBUG */ + +struct nvgpu_allocator; +struct nvgpu_alloc_carveout; +struct vm_gk20a; +struct gk20a; + +/* + * Operations for an allocator to implement. + */ +struct nvgpu_allocator_ops { + u64 (*alloc)(struct nvgpu_allocator *allocator, u64 len); + void (*free)(struct nvgpu_allocator *allocator, u64 addr); + + /* + * Special interface to allocate a memory region with a specific + * starting address. Yikes. Note: if free() works for freeing both + * regular and fixed allocations then free_fixed() does not need to + * be implemented. This behavior exists for legacy reasons and should + * not be propagated to new allocators. + */ + u64 (*alloc_fixed)(struct nvgpu_allocator *allocator, + u64 base, u64 len); + void (*free_fixed)(struct nvgpu_allocator *allocator, + u64 base, u64 len); + + /* + * Allow allocators to reserve space for carveouts. + */ + int (*reserve_carveout)(struct nvgpu_allocator *allocator, + struct nvgpu_alloc_carveout *co); + void (*release_carveout)(struct nvgpu_allocator *allocator, + struct nvgpu_alloc_carveout *co); + + /* + * Returns info about the allocator. + */ + u64 (*base)(struct nvgpu_allocator *allocator); + u64 (*length)(struct nvgpu_allocator *allocator); + u64 (*end)(struct nvgpu_allocator *allocator); + int (*inited)(struct nvgpu_allocator *allocator); + u64 (*space)(struct nvgpu_allocator *allocator); + + /* Destructor. */ + void (*fini)(struct nvgpu_allocator *allocator); + + /* Debugging. */ + void (*print_stats)(struct nvgpu_allocator *allocator, + struct seq_file *s, int lock); +}; + +struct nvgpu_allocator { + char name[32]; + struct mutex lock; + + void *priv; + const struct nvgpu_allocator_ops *ops; + + struct dentry *debugfs_entry; + bool debug; /* Control for debug msgs. */ +}; + +struct nvgpu_alloc_carveout { + const char *name; + u64 base; + u64 length; + + struct nvgpu_allocator *allocator; + + /* + * For usage by the allocator implementation. + */ + struct list_head co_entry; +}; + +#define NVGPU_CARVEOUT(__name, __base, __length) \ + { \ + .name = (__name), \ + .base = (__base), \ + .length = (__length) \ + } + +/* + * These are the available allocator flags. + * + * GPU_ALLOC_GVA_SPACE + * + * This flag makes sense for the buddy allocator only. It specifies that the + * allocator will be used for managing a GVA space. When managing GVA spaces + * special care has to be taken to ensure that allocations of similar PTE + * sizes are placed in the same PDE block. This allows the higher level + * code to skip defining both small and large PTE tables for every PDE. That + * can save considerable memory for address spaces that have a lot of + * allocations. + * + * GPU_ALLOC_NO_ALLOC_PAGE + * + * For any allocator that needs to manage a resource in a latency critical + * path this flag specifies that the allocator should not use any kmalloc() + * or similar functions during normal operation. Initialization routines + * may still use kmalloc(). This prevents the possibility of long waits for + * pages when using alloc_page(). Currently only the bitmap allocator + * implements this functionality. + * + * Also note that if you accept this flag then you must also define the + * free_fixed() function. Since no meta-data is allocated to help free + * allocations you need to keep track of the meta-data yourself (in this + * case the base and length of the allocation as opposed to just the base + * of the allocation). + * + * GPU_ALLOC_4K_VIDMEM_PAGES + * + * We manage vidmem pages at a large page granularity for performance + * reasons; however, this can lead to wasting memory. For page allocators + * setting this flag will tell the allocator to manage pools of 4K pages + * inside internally allocated large pages. + * + * Currently this flag is ignored since the only usage of the page allocator + * uses a 4K block size already. However, this flag has been reserved since + * it will be necessary in the future. + * + * GPU_ALLOC_FORCE_CONTIG + * + * Force allocations to be contiguous. Currently only relevant for page + * allocators since all other allocators are naturally contiguous. + * + * GPU_ALLOC_NO_SCATTER_GATHER + * + * The page allocator normally returns a scatter gather data structure for + * allocations (to handle discontiguous pages). However, at times that can + * be annoying so this flag forces the page allocator to return a u64 + * pointing to the allocation base (requires GPU_ALLOC_FORCE_CONTIG to be + * set as well). + */ +#define GPU_ALLOC_GVA_SPACE 0x1 +#define GPU_ALLOC_NO_ALLOC_PAGE 0x2 +#define GPU_ALLOC_4K_VIDMEM_PAGES 0x4 +#define GPU_ALLOC_FORCE_CONTIG 0x8 +#define GPU_ALLOC_NO_SCATTER_GATHER 0x10 + +static inline void alloc_lock(struct nvgpu_allocator *a) +{ + mutex_lock(&a->lock); +} + +static inline void alloc_unlock(struct nvgpu_allocator *a) +{ + mutex_unlock(&a->lock); +} + +/* + * Buddy allocator specific initializers. + */ +int __nvgpu_buddy_allocator_init(struct gk20a *g, struct nvgpu_allocator *a, + struct vm_gk20a *vm, const char *name, + u64 base, u64 size, u64 blk_size, + u64 max_order, u64 flags); +int nvgpu_buddy_allocator_init(struct gk20a *g, struct nvgpu_allocator *a, + const char *name, u64 base, u64 size, + u64 blk_size, u64 flags); + +/* + * Bitmap initializers. + */ +int nvgpu_bitmap_allocator_init(struct gk20a *g, struct nvgpu_allocator *a, + const char *name, u64 base, u64 length, + u64 blk_size, u64 flags); + +/* + * Page allocator initializers. + */ +int nvgpu_page_allocator_init(struct gk20a *g, struct nvgpu_allocator *a, + const char *name, u64 base, u64 length, + u64 blk_size, u64 flags); + +/* + * Lockless allocatior initializers. + * Note: This allocator can only allocate fixed-size structures of a + * pre-defined size. + */ +int nvgpu_lockless_allocator_init(struct gk20a *g, struct nvgpu_allocator *a, + const char *name, u64 base, u64 length, + u64 struct_size, u64 flags); + +#define GPU_BALLOC_MAX_ORDER 31 + +/* + * Allocator APIs. + */ +u64 nvgpu_alloc(struct nvgpu_allocator *allocator, u64 len); +void nvgpu_free(struct nvgpu_allocator *allocator, u64 addr); + +u64 nvgpu_alloc_fixed(struct nvgpu_allocator *allocator, u64 base, u64 len); +void nvgpu_free_fixed(struct nvgpu_allocator *allocator, u64 base, u64 len); + +int nvgpu_alloc_reserve_carveout(struct nvgpu_allocator *a, + struct nvgpu_alloc_carveout *co); +void nvgpu_alloc_release_carveout(struct nvgpu_allocator *a, + struct nvgpu_alloc_carveout *co); + +u64 nvgpu_alloc_base(struct nvgpu_allocator *a); +u64 nvgpu_alloc_length(struct nvgpu_allocator *a); +u64 nvgpu_alloc_end(struct nvgpu_allocator *a); +u64 nvgpu_alloc_initialized(struct nvgpu_allocator *a); +u64 nvgpu_alloc_space(struct nvgpu_allocator *a); + +void nvgpu_alloc_destroy(struct nvgpu_allocator *allocator); + +void nvgpu_alloc_print_stats(struct nvgpu_allocator *a, + struct seq_file *s, int lock); + +/* + * Common functionality for the internals of the allocators. + */ +void nvgpu_init_alloc_debug(struct gk20a *g, struct nvgpu_allocator *a); +void nvgpu_fini_alloc_debug(struct nvgpu_allocator *a); + +int __nvgpu_alloc_common_init(struct nvgpu_allocator *a, + const char *name, void *priv, bool dbg, + const struct nvgpu_allocator_ops *ops); + +static inline void nvgpu_alloc_enable_dbg(struct nvgpu_allocator *a) +{ + a->debug = true; +} + +static inline void nvgpu_alloc_disable_dbg(struct nvgpu_allocator *a) +{ + a->debug = false; +} + +/* + * Debug stuff. + */ +extern u32 nvgpu_alloc_tracing_on; + +void nvgpu_alloc_debugfs_init(struct device *dev); + +#define nvgpu_alloc_trace_func() \ + do { \ + if (nvgpu_alloc_tracing_on) \ + trace_printk("%s\n", __func__); \ + } while (0) + +#define nvgpu_alloc_trace_func_done() \ + do { \ + if (nvgpu_alloc_tracing_on) \ + trace_printk("%s_done\n", __func__); \ + } while (0) + +#define __alloc_pstat(seq, allocator, fmt, arg...) \ + do { \ + if (s) \ + seq_printf(seq, fmt, ##arg); \ + else \ + alloc_dbg(allocator, fmt, ##arg); \ + } while (0) + +#define __alloc_dbg(a, fmt, arg...) \ + pr_info("%-25s %25s() " fmt, (a)->name, __func__, ##arg) + +#if defined(ALLOCATOR_DEBUG) +/* + * Always print the debug messages... + */ +#define alloc_dbg(a, fmt, arg...) __alloc_dbg(a, fmt, ##arg) +#else +/* + * Only print debug messages if debug is enabled for a given allocator. + */ +#define alloc_dbg(a, fmt, arg...) \ + do { \ + if ((a)->debug) \ + __alloc_dbg((a), fmt, ##arg); \ + } while (0) + +#endif + +#endif /* NVGPU_ALLOCATOR_H */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/page_allocator.h b/drivers/gpu/nvgpu/include/nvgpu/page_allocator.h new file mode 100644 index 00000000..7c21c117 --- /dev/null +++ b/drivers/gpu/nvgpu/include/nvgpu/page_allocator.h @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef PAGE_ALLOCATOR_PRIV_H +#define PAGE_ALLOCATOR_PRIV_H + +#include +#include + +#include + +struct nvgpu_allocator; + +/* + * This allocator implements the ability to do SLAB style allocation since the + * GPU has two page sizes available - 4k and 64k/128k. When the default + * granularity is the large page size (64k/128k) small allocations become very + * space inefficient. This is most notable in PDE and PTE blocks which are 4k + * in size. + * + * Thus we need the ability to suballocate in 64k pages. The way we do this for + * the GPU is as follows. We have several buckets for sub-64K allocations: + * + * B0 - 4k + * B1 - 8k + * B3 - 16k + * B4 - 32k + * B5 - 64k (for when large pages are 128k) + * + * When an allocation comes in for less than the large page size (from now on + * assumed to be 64k) the allocation is satisfied by one of the buckets. + */ +struct page_alloc_slab { + struct list_head empty; + struct list_head partial; + struct list_head full; + + int nr_empty; + int nr_partial; + int nr_full; + + u32 slab_size; +}; + +enum slab_page_state { + SP_EMPTY, + SP_PARTIAL, + SP_FULL, + SP_NONE +}; + +struct page_alloc_slab_page { + unsigned long bitmap; + u64 page_addr; + u32 slab_size; + + u32 nr_objects; + u32 nr_objects_alloced; + + enum slab_page_state state; + + struct page_alloc_slab *owner; + struct list_head list_entry; +}; + +struct page_alloc_chunk { + struct list_head list_entry; + + u64 base; + u64 length; +}; + +/* + * Struct to handle internal management of page allocation. It holds a list + * of the chunks of pages that make up the overall allocation - much like a + * scatter gather table. + */ +struct nvgpu_page_alloc { + struct list_head alloc_chunks; + + int nr_chunks; + u64 length; + + /* + * Only useful for the RB tree - since the alloc may have discontiguous + * pages the base is essentially irrelevant except for the fact that it + * is guarenteed to be unique. + */ + u64 base; + + struct rb_node tree_entry; + + /* + * Set if this is a slab alloc. Points back to the slab page that owns + * this particular allocation. nr_chunks will always be 1 if this is + * set. + */ + struct page_alloc_slab_page *slab_page; +}; + +struct nvgpu_page_allocator { + struct nvgpu_allocator *owner; /* Owner of this allocator. */ + + /* + * Use a buddy allocator to manage the allocation of the underlying + * pages. This lets us abstract the discontiguous allocation handling + * out of the annoyingly complicated buddy allocator. + */ + struct nvgpu_allocator source_allocator; + + /* + * Page params. + */ + u64 base; + u64 length; + u64 page_size; + u32 page_shift; + + struct rb_root allocs; /* Outstanding allocations. */ + + struct page_alloc_slab *slabs; + int nr_slabs; + + u64 flags; + + /* + * Stat tracking. + */ + u64 nr_allocs; + u64 nr_frees; + u64 nr_fixed_allocs; + u64 nr_fixed_frees; + u64 nr_slab_allocs; + u64 nr_slab_frees; + u64 pages_alloced; + u64 pages_freed; +}; + +static inline struct nvgpu_page_allocator *page_allocator( + struct nvgpu_allocator *a) +{ + return (struct nvgpu_page_allocator *)(a)->priv; +} + +static inline struct nvgpu_allocator *palloc_owner( + struct nvgpu_page_allocator *a) +{ + return a->owner; +} + +#endif diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c index 69f6fcaf..66c9344b 100644 --- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c @@ -227,11 +227,11 @@ static void vgpu_vm_remove_support(struct vm_gk20a *vm) err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); WARN_ON(err || msg.ret); - gk20a_alloc_destroy(&vm->vma[gmmu_page_size_kernel]); - if (gk20a_alloc_initialized(&vm->vma[gmmu_page_size_small])) - gk20a_alloc_destroy(&vm->vma[gmmu_page_size_small]); - if (gk20a_alloc_initialized(&vm->vma[gmmu_page_size_big])) - gk20a_alloc_destroy(&vm->vma[gmmu_page_size_big]); + nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_kernel]); + if (nvgpu_alloc_initialized(&vm->vma[gmmu_page_size_small])) + nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_small]); + if (nvgpu_alloc_initialized(&vm->vma[gmmu_page_size_big])) + nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_big]); mutex_unlock(&vm->update_gmmu_lock); @@ -370,7 +370,7 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share, snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, gmmu_page_sizes[gmmu_page_size_small] >> 10); - err = __gk20a_buddy_allocator_init( + err = __nvgpu_buddy_allocator_init( g, &vm->vma[gmmu_page_size_small], vm, name, @@ -386,7 +386,7 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share, if (large_vma_start < large_vma_limit) { snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, gmmu_page_sizes[gmmu_page_size_big] >> 10); - err = __gk20a_buddy_allocator_init( + err = __nvgpu_buddy_allocator_init( g, &vm->vma[gmmu_page_size_big], vm, name, @@ -404,7 +404,7 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share, /* * kernel reserved VMA is at the end of the aperture */ - err = __gk20a_buddy_allocator_init( + err = __nvgpu_buddy_allocator_init( g, &vm->vma[gmmu_page_size_kernel], vm, name, @@ -428,10 +428,10 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share, clean_up_big_allocator: if (large_vma_start < large_vma_limit) - gk20a_alloc_destroy(&vm->vma[gmmu_page_size_big]); + nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_big]); clean_up_small_allocator: if (small_vma_start < small_vma_limit) - gk20a_alloc_destroy(&vm->vma[gmmu_page_size_small]); + nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_small]); clean_up_share: msg.cmd = TEGRA_VGPU_CMD_AS_FREE_SHARE; msg.handle = vgpu_get_handle(g); -- cgit v1.2.2