gpu: nvgpu: Implement a vidmem allocator

Implement an allocator suitable for managing the video memory on dGPUs. It works by allocating chunks from an underlying buddy allocator and collating the chunks together (similar to what an sgt does in the wider Linux kernel). This handles the ability to get large buffers in potentially fragmented memory. The GMMU can then obviously map the physical vidmem into contiguous GVA spaces. Jira DNVGPU-96 Change-Id: Ic1d7800b033a170b77790aa23fad6858443d0e89 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1197203 (cherry picked from commit fa44684a843956ae384fef6d7a79b9cbbd04f73e) Reviewed-on: http://git-master/r/1185231 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
author: Alex Waterman <alexw@nvidia.com> 2016-07-21 21:26:20 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2016-08-30 13:04:10 -0400
commit: 448df6ed271a7b73a1a5e3dfbba826e745b82922 (patch)
tree: 284c4c4ede880629df1c342e2d5516c462d5ed56 /drivers/gpu
parent: 954258e121d2436097d5fc1abc6b7d73ddd784f6 (diff)
5 files changed, 668 insertions, 3 deletions
diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu
index e3c115c6..7e703706 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu
@@ -55,6 +55,7 @@ nvgpu-y := \
        gk20a/gk20a_allocator.o \
        gk20a/gk20a_allocator_bitmap.o \
        gk20a/gk20a_allocator_buddy.o \
+        gk20a/gk20a_allocator_page.o \
        gk20a/cde_gk20a.o \
        gk20a/platform_gk20a_generic.o \
        gk20a/tsg_gk20a.o \
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h
index f3b6dab3..9becf053 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h
@@ -99,9 +99,32 @@ struct gk20a_allocator {
 *     allocations you need to keep track of the meta-data yourself (in this
 *     case the base and length of the allocation as opposed to just the base
 *     of the allocation).
+ *
+ *   GPU_ALLOC_4K_VIDMEM_PAGES
+ *
+ *     We manage vidmem pages at a large page granularity for performance
+ *     reasons; however, this can lead to wasting memory. For page allocators
+ *     setting this flag will tell the allocator to manage pools of 4K pages
+ *     inside internally allocated large pages.
+ *
+ *   GPU_ALLOC_FORCE_CONTIG
+ *
+ *     Force allocations to be contiguous. Currently only relevant for page
+ *     allocators since all other allocators are naturally contiguous.
+ *
+ *   GPU_ALLOC_NO_SCATTER_GATHER
+ *
+ *     The page allocator normally returns a scatter gather data structure for
+ *     allocations (to handle discontiguous pages). However, at times that can
+ *     be annoying so this flag forces the page allocator to return a u64
+ *     pointing to the allocation base (requires GPU_ALLOC_FORCE_CONTIG to be
+ *     set as well).
 */
 #define GPU_ALLOC_GVA_SPACE             0x1
 #define GPU_ALLOC_NO_ALLOC_PAGE         0x2
+#define GPU_ALLOC_4K_VIDMEM_PAGES       0x4
+#define GPU_ALLOC_FORCE_CONTIG          0x8
+#define GPU_ALLOC_NO_SCATTER_GATHER     0x10
 static inline void alloc_lock(struct gk20a_allocator *a)
 {
@@ -131,6 +154,13 @@ int gk20a_bitmap_allocator_init(struct gk20a_allocator *__a,
                                const char *name, u64 base, u64 length,
                                u64 blk_size, u64 flags);
+/*
+ * Page allocator initializers.
+ */
+int gk20a_page_allocator_init(struct gk20a_allocator *__a,
+                              const char *name, u64 base, u64 length,
+                              u64 blk_size, u64 flags);
 #define GPU_BALLOC_MAX_ORDER            31
 /*
@@ -199,7 +229,7 @@ void gk20a_alloc_debugfs_init(struct platform_device *pdev);
        } while (0)
 #define __alloc_dbg(a, fmt, arg...)                                     \
-        pr_info("%-25s %25s() " fmt, (a)->name, __func__, ##arg)
+        pr_warn("%-25s %25s() " fmt, (a)->name, __func__, ##arg)
 #if defined(ALLOCATOR_DEBUG)
 /*
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c
new file mode 100644
index 00000000..534027cc
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c
@@ -0,0 +1,532 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/bitops.h>
+#include "gk20a_allocator.h"
+#include "buddy_allocator_priv.h"
+#include "page_allocator_priv.h"
+#define palloc_dbg(a, fmt, arg...)                      \
+        alloc_dbg(palloc_owner(a), fmt, ##arg)
+static struct kmem_cache *page_alloc_cache;
+static struct kmem_cache *page_alloc_chunk_cache;
+static DEFINE_MUTEX(meta_data_cache_lock);
+static u64 gk20a_page_alloc_length(struct gk20a_allocator *a)
+{
+        struct gk20a_page_allocator *va = a->priv;
+        return gk20a_alloc_length(&va->source_allocator);
+}
+static u64 gk20a_page_alloc_base(struct gk20a_allocator *a)
+{
+        struct gk20a_page_allocator *va = a->priv;
+        return gk20a_alloc_base(&va->source_allocator);
+}
+static int gk20a_page_alloc_inited(struct gk20a_allocator *a)
+{
+        struct gk20a_page_allocator *va = a->priv;
+        return gk20a_alloc_initialized(&va->source_allocator);
+}
+static u64 gk20a_page_alloc_end(struct gk20a_allocator *a)
+{
+        struct gk20a_page_allocator *va = a->priv;
+        return gk20a_alloc_end(&va->source_allocator);
+}
+static int __insert_page_alloc(struct gk20a_page_allocator *a,
+                               struct gk20a_page_alloc *alloc)
+{
+        struct rb_node **new = &a->allocs.rb_node;
+        struct rb_node *parent = NULL;
+        while (*new) {
+                struct gk20a_page_alloc *tmp =
+                        container_of(*new, struct gk20a_page_alloc,
+                                     tree_entry);
+                parent = *new;
+                if (alloc->base < tmp->base) {
+                        new = &((*new)->rb_left);
+                } else if (alloc->base > tmp->base) {
+                        new = &((*new)->rb_right);
+                } else {
+                        WARN(1, "Duplicate entries in allocated list!\n");
+                        return 0;
+                }
+        }
+        rb_link_node(&alloc->tree_entry, parent, new);
+        rb_insert_color(&alloc->tree_entry, &a->allocs);
+        return 0;
+}
+static struct gk20a_page_alloc *__find_page_alloc(
+        struct gk20a_page_allocator *a,
+        u64 addr)
+{
+        struct rb_node *node = a->allocs.rb_node;
+        struct gk20a_page_alloc *alloc;
+        while (node) {
+                alloc = container_of(node, struct gk20a_page_alloc, tree_entry);
+                if (addr < alloc->base)
+                        node = node->rb_left;
+                else if (addr > alloc->base)
+                        node = node->rb_right;
+                else
+                        break;
+        }
+        if (!node)
+                return NULL;
+        rb_erase(node, &a->allocs);
+        return alloc;
+}
+/*
+ * Allocate physical pages. Since the underlying allocator is a buddy allocator
+ * the returned pages are always contiguous. However, since there could be
+ * fragmentation in the space this allocator will collate smaller non-contiguous
+ * allocations together if necessary.
+ */
+static struct gk20a_page_alloc *__gk20a_alloc_pages(
+        struct gk20a_page_allocator *a, u64 pages)
+{
+        struct gk20a_page_alloc *alloc;
+        struct page_alloc_chunk *c;
+        u64 max_chunk_len = pages << a->page_shift;
+        int i = 0;
+        alloc = kmem_cache_alloc(page_alloc_cache, GFP_KERNEL);
+        if (!alloc)
+                goto fail;
+        INIT_LIST_HEAD(&alloc->alloc_chunks);
+        alloc->length = pages << a->page_shift;
+        while (pages) {
+                u64 chunk_addr = 0;
+                u64 chunk_pages = 1 << __fls(pages);
+                u64 chunk_len = chunk_pages << a->page_shift;
+                /*
+                 * Take care of the possibility that the allocation must be
+                 * contiguous. If this is not the first iteration then that
+                 * means the first iteration failed to alloc the entire
+                 * requested size. The buddy allocator guarantees any given
+                 * single alloc is contiguous.
+                 */
+                if (a->flags & GPU_ALLOC_FORCE_CONTIG && i != 0)
+                        goto fail_cleanup;
+                if (chunk_len > max_chunk_len)
+                        chunk_len = max_chunk_len;
+                /*
+                 * Keep attempting to allocate in smaller chunks until the alloc
+                 * either succeeds or is smaller than the page_size of the
+                 * allocator (i.e the allocator is OOM).
+                 */
+                do {
+                        chunk_addr = gk20a_alloc(&a->source_allocator,
+                                                 chunk_len);
+                        /* Divide by 2 and try again */
+                        if (!chunk_addr) {
+                                palloc_dbg(a, "balloc failed: 0x%llx\n",
+                                           chunk_len);
+                                chunk_len >>= 1;
+                                max_chunk_len = chunk_len;
+                        }
+                } while (!chunk_addr && chunk_len >= a->page_size);
+                if (!chunk_addr) {
+                        palloc_dbg(a, "bailing @ 0x%llx\n", chunk_len);
+                        goto fail_cleanup;
+                }
+                c = kmem_cache_alloc(page_alloc_chunk_cache, GFP_KERNEL);
+                if (!c) {
+                        gk20a_free(&a->source_allocator, chunk_addr);
+                        goto fail_cleanup;
+                }
+                pages -= chunk_pages;
+                c->base = chunk_addr;
+                c->length = chunk_len;
+                list_add(&c->list_entry, &alloc->alloc_chunks);
+                i++;
+        }
+        alloc->nr_chunks = i;
+        c = list_first_entry(&alloc->alloc_chunks,
+                             struct page_alloc_chunk, list_entry);
+        alloc->base = c->base;
+        return alloc;
+fail_cleanup:
+        while (!list_empty(&alloc->alloc_chunks)) {
+                c = list_first_entry(&alloc->alloc_chunks,
+                                     struct page_alloc_chunk, list_entry);
+                list_del(&c->list_entry);
+                kfree(c);
+        }
+        kfree(alloc);
+fail:
+        return ERR_PTR(-ENOMEM);
+}
+/*
+ * Allocate enough pages to satisfy @len. Page size is determined at
+ * initialization of the allocator.
+ *
+ * The return is actually a pointer to a struct gk20a_page_alloc pointer. This
+ * is because it doesn't make a lot of sense to return the address of the first
+ * page in the list of pages (since they could be discontiguous). This has
+ * precedent in the dma_alloc APIs, though, it's really just an annoying
+ * artifact of the fact that the gk20a_alloc() API requires a u64 return type.
+ */
+static u64 gk20a_page_alloc(struct gk20a_allocator *__a, u64 len)
+{
+        struct gk20a_page_allocator *a = page_allocator(__a);
+        struct gk20a_page_alloc *alloc = NULL;
+        struct page_alloc_chunk *c;
+        u64 real_len;
+        u64 pages;
+        int i = 0;
+        /*
+         * If we want contig pages we have to round up to a power of two. It's
+         * easier to do that here than in the buddy allocator.
+         */
+        real_len = a->flags & GPU_ALLOC_FORCE_CONTIG ?
+                roundup_pow_of_two(len) : len;
+        pages = ALIGN(real_len, a->page_size) >> a->page_shift;
+        alloc_lock(__a);
+        alloc = __gk20a_alloc_pages(a, pages);
+        if (IS_ERR(alloc)) {
+                alloc_unlock(__a);
+                palloc_dbg(a, "Alloc 0x%llx (%llu) (failed)\n",
+                           pages << a->page_shift, pages);
+                return 0;
+        }
+        __insert_page_alloc(a, alloc);
+        alloc_unlock(__a);
+        palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n",
+                   pages << a->page_shift, pages, alloc->base);
+        list_for_each_entry(c, &alloc->alloc_chunks, list_entry) {
+                palloc_dbg(a, "  Chunk %2d: 0x%010llx + 0x%llx\n",
+                           i++, c->base, c->length);
+        }
+        a->nr_allocs++;
+        a->pages_alloced += pages;
+        if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER)
+                return alloc->base;
+        else
+                return (u64) (uintptr_t) alloc;
+}
+static void __gk20a_free_pages(struct gk20a_page_allocator *a,
+                               struct gk20a_page_alloc *alloc)
+{
+        struct page_alloc_chunk *chunk;
+        while (!list_empty(&alloc->alloc_chunks)) {
+                chunk = list_first_entry(&alloc->alloc_chunks,
+                                         struct page_alloc_chunk,
+                                         list_entry);
+                list_del(&chunk->list_entry);
+                gk20a_free(&a->source_allocator, chunk->base);
+                kfree(chunk);
+        }
+        kfree(alloc);
+}
+/*
+ * Note: this will remove the gk20a_page_alloc struct from the RB tree
+ * if it's found.
+ */
+static void gk20a_page_free(struct gk20a_allocator *__a, u64 base)
+{
+        struct gk20a_page_allocator *a = page_allocator(__a);
+        struct gk20a_page_alloc *alloc;
+        alloc_lock(__a);
+        if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER)
+                alloc = __find_page_alloc(a, base);
+        else
+                alloc = __find_page_alloc(a,
+                        ((struct gk20a_page_alloc *)(uintptr_t)base)->base);
+        if (!alloc) {
+                palloc_dbg(a, "Hrm, found no alloc?\n");
+                goto done;
+        }
+        a->nr_frees++;
+        a->pages_freed += (alloc->length >> a->page_shift);
+        /*
+         * Frees *alloc.
+         */
+        __gk20a_free_pages(a, alloc);
+        palloc_dbg(a, "Free  0x%010llx id=0x%010llx\n",
+                   alloc->length, alloc->base);
+done:
+        alloc_unlock(__a);
+}
+static struct gk20a_page_alloc *__gk20a_alloc_pages_fixed(
+        struct gk20a_page_allocator *a, u64 base, u64 length)
+{
+        struct gk20a_page_alloc *alloc;
+        struct page_alloc_chunk *c;
+        alloc = kmem_cache_alloc(page_alloc_cache, GFP_KERNEL);
+        c = kmem_cache_alloc(page_alloc_chunk_cache, GFP_KERNEL);
+        if (!alloc || !c)
+                goto fail;
+        alloc->base = gk20a_alloc_fixed(&a->source_allocator, base, length);
+        if (!alloc->base) {
+                WARN(1, "gk20a: failed to fixed alloc pages @ 0x%010llx", base);
+                goto fail;
+        }
+        alloc->nr_chunks = 1;
+        alloc->length = length;
+        INIT_LIST_HEAD(&alloc->alloc_chunks);
+        c->base = alloc->base;
+        c->length = length;
+        list_add(&c->list_entry, &alloc->alloc_chunks);
+        return alloc;
+fail:
+        kfree(c);
+        kfree(alloc);
+        return ERR_PTR(-ENOMEM);
+}
+static u64 gk20a_page_alloc_fixed(struct gk20a_allocator *__a,
+                                  u64 base, u64 len)
+{
+        struct gk20a_page_allocator *a = page_allocator(__a);
+        struct gk20a_page_alloc *alloc = NULL;
+        struct page_alloc_chunk *c;
+        u64 aligned_len, pages;
+        int i = 0;
+        aligned_len = ALIGN(len, a->page_size);
+        pages = aligned_len >> a->page_shift;
+        alloc_lock(__a);
+        alloc = __gk20a_alloc_pages_fixed(a, base, aligned_len);
+        if (IS_ERR(alloc)) {
+                alloc_unlock(__a);
+                return 0;
+        }
+        __insert_page_alloc(a, alloc);
+        alloc_unlock(__a);
+        palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)\n",
+                   alloc->base, aligned_len, pages);
+        list_for_each_entry(c, &alloc->alloc_chunks, list_entry) {
+                palloc_dbg(a, "  Chunk %2d: 0x%010llx + 0x%llx\n",
+                           i++, c->base, c->length);
+        }
+        a->nr_fixed_allocs++;
+        a->pages_alloced += pages;
+        if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER)
+                return alloc->base;
+        else
+                return (u64) (uintptr_t) alloc;
+}
+static void gk20a_page_free_fixed(struct gk20a_allocator *__a,
+                                  u64 base, u64 len)
+{
+        struct gk20a_page_allocator *a = page_allocator(__a);
+        struct gk20a_page_alloc *alloc;
+        alloc_lock(__a);
+        if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) {
+                alloc = __find_page_alloc(a, base);
+                if (!alloc)
+                        goto done;
+        } else {
+                alloc = (struct gk20a_page_alloc *) (uintptr_t) base;
+        }
+        /*
+         * This works for the time being since the buddy allocator
+         * uses the same free function for both fixed and regular
+         * allocs. This would have to be updated if the underlying
+         * allocator were to change.
+         */
+        __gk20a_free_pages(a, alloc);
+        palloc_dbg(a, "Free  [fixed] 0x%010llx + 0x%llx\n",
+                   alloc->base, alloc->length);
+        a->nr_fixed_frees++;
+        a->pages_freed += (alloc->length >> a->page_shift);
+done:
+        alloc_unlock(__a);
+}
+static void gk20a_page_allocator_destroy(struct gk20a_allocator *__a)
+{
+        struct gk20a_page_allocator *a = page_allocator(__a);
+        alloc_lock(__a);
+        kfree(a);
+        __a->priv = NULL;
+        alloc_unlock(__a);
+}
+static void gk20a_page_print_stats(struct gk20a_allocator *__a,
+                                   struct seq_file *s, int lock)
+{
+        struct gk20a_page_allocator *a = page_allocator(__a);
+        if (lock)
+                alloc_lock(__a);
+        __alloc_pstat(s, __a, "Page allocator:\n");
+        __alloc_pstat(s, __a, "  allocs         %lld\n", a->nr_allocs);
+        __alloc_pstat(s, __a, "  frees          %lld\n", a->nr_frees);
+        __alloc_pstat(s, __a, "  fixed_allocs   %lld\n", a->nr_fixed_allocs);
+        __alloc_pstat(s, __a, "  fixed_frees    %lld\n", a->nr_fixed_frees);
+        __alloc_pstat(s, __a, "  pages alloced  %lld\n", a->pages_alloced);
+        __alloc_pstat(s, __a, "  pages freed    %lld\n", a->pages_freed);
+        __alloc_pstat(s, __a, "\n");
+        __alloc_pstat(s, __a, "Source alloc: %s\n",
+                      a->source_allocator.name);
+        gk20a_alloc_print_stats(&a->source_allocator, s, lock);
+        if (lock)
+                alloc_unlock(__a);
+}
+static const struct gk20a_allocator_ops page_ops = {
+        .alloc          = gk20a_page_alloc,
+        .free           = gk20a_page_free,
+        .alloc_fixed    = gk20a_page_alloc_fixed,
+        .free_fixed     = gk20a_page_free_fixed,
+        .base           = gk20a_page_alloc_base,
+        .length         = gk20a_page_alloc_length,
+        .end            = gk20a_page_alloc_end,
+        .inited         = gk20a_page_alloc_inited,
+        .fini           = gk20a_page_allocator_destroy,
+        .print_stats    = gk20a_page_print_stats,
+};
+int gk20a_page_allocator_init(struct gk20a_allocator *__a,
+                                const char *name, u64 base, u64 length,
+                                u64 blk_size, u64 flags)
+{
+        struct gk20a_page_allocator *a;
+        char buddy_name[sizeof(__a->name)];
+        int err;
+        mutex_lock(&meta_data_cache_lock);
+        if (!page_alloc_cache)
+                page_alloc_cache = KMEM_CACHE(gk20a_page_alloc, 0);
+        if (!page_alloc_chunk_cache)
+                page_alloc_chunk_cache = KMEM_CACHE(page_alloc_chunk, 0);
+        mutex_unlock(&meta_data_cache_lock);
+        if (!page_alloc_cache || !page_alloc_chunk_cache)
+                return -ENOMEM;
+        a = kzalloc(sizeof(struct gk20a_page_allocator), GFP_KERNEL);
+        if (!a)
+                return -ENOMEM;
+        err = __gk20a_alloc_common_init(__a, name, a, false, &page_ops);
+        if (err)
+                goto fail;
+        a->base = base;
+        a->length = length;
+        a->page_size = blk_size;
+        a->page_shift = __ffs(blk_size);
+        a->allocs = RB_ROOT;
+        a->owner = __a;
+        a->flags = flags;
+        snprintf(buddy_name, sizeof(buddy_name), "%s-src", name);
+        err = gk20a_buddy_allocator_init(&a->source_allocator, buddy_name, base,
+                                         length, blk_size, 0);
+        if (err)
+                goto fail;
+        gk20a_init_alloc_debug(__a);
+        palloc_dbg(a, "New allocator: type      page\n");
+        palloc_dbg(a, "               base      0x%llx\n", a->base);
+        palloc_dbg(a, "               size      0x%llx\n", a->length);
+        palloc_dbg(a, "               page_size 0x%llx\n", a->page_size);
+        palloc_dbg(a, "               flags     0x%llx\n", a->flags);
+        return 0;
+fail:
+        kfree(a);
+        return err;
+}
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index b63444d0..cab10902 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -764,8 +764,10 @@ static int gk20a_init_vidmem(struct mm_gk20a *mm)
        if (!size)
                return 0;
-        err = gk20a_buddy_allocator_init(&g->mm.vidmem.allocator, "vidmem",
+        err = gk20a_page_allocator_init(&g->mm.vidmem.allocator, "vidmem",
-                        SZ_4K, size - SZ_4K, SZ_4K, 0);
+                                        SZ_4K, size - SZ_4K, SZ_4K,
+                                        GPU_ALLOC_FORCE_CONTIG |
+                                        GPU_ALLOC_NO_SCATTER_GATHER);
        if (err) {
                gk20a_err(d, "Failed to register vidmem for size %zu: %d",
                                size, err);
diff --git a/drivers/gpu/nvgpu/gk20a/page_allocator_priv.h b/drivers/gpu/nvgpu/gk20a/page_allocator_priv.h
new file mode 100644
index 00000000..bce5b75e
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/page_allocator_priv.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef PAGE_ALLOCATOR_PRIV_H
+#define PAGE_ALLOCATOR_PRIV_H
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include "gk20a_allocator.h"
+struct gk20a_allocator;
+struct page_alloc_chunk {
+        struct list_head list_entry;
+        u64 base;
+        u64 length;
+};
+/*
+ * Struct to handle internal management of page allocation. It holds a list
+ * of the chunks of page that make up the overall allocation - much like a
+ * scatter gather table.
+ */
+struct gk20a_page_alloc {
+        struct list_head alloc_chunks;
+        int nr_chunks;
+        u64 length;
+        /*
+         * Only useful for the RB tree - since the alloc will have discontiguous
+         * pages the base is essentially irrelevant except for the fact that it
+         * is guarenteed to be unique.
+         */
+        u64 base;
+        struct rb_node tree_entry;
+};
+struct gk20a_page_allocator {
+        struct gk20a_allocator *owner;  /* Owner of this allocator. */
+        /*
+         * Use a buddy allocator to manage the allocation of the underlying
+         * pages. This lets us abstract the discontiguous allocation handling
+         * out of the annoyingly complicated buddy allocator.
+         */
+        struct gk20a_allocator source_allocator;
+        /*
+         * Page params.
+         */
+        u64 base;
+        u64 length;
+        u64 page_size;
+        u32 page_shift;
+        struct rb_root allocs;          /* Outstanding allocations. */
+        u64 flags;
+        /*
+         * Stat tracking.
+         */
+        u64 nr_allocs;
+        u64 nr_frees;
+        u64 nr_fixed_allocs;
+        u64 nr_fixed_frees;
+        u64 pages_alloced;
+        u64 pages_freed;
+};
+static inline struct gk20a_page_allocator *page_allocator(
+        struct gk20a_allocator *a)
+{
+        return (struct gk20a_page_allocator *)(a)->priv;
+}
+static inline struct gk20a_allocator *palloc_owner(
+        struct gk20a_page_allocator *a)
+{
+        return a->owner;
+}
+#endif
author	Alex Waterman <alexw@nvidia.com>	2016-07-21 21:26:20 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2016-08-30 13:04:10 -0400
commit	448df6ed271a7b73a1a5e3dfbba826e745b82922 (patch)
tree	284c4c4ede880629df1c342e2d5516c462d5ed56 /drivers/gpu
parent	954258e121d2436097d5fc1abc6b7d73ddd784f6 (diff)