3 files changed, 486 insertions, 47 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c
index 2e5d46b9..b8e38620 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c
@@ -17,6 +17,7 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/bitops.h>
+#include <linux/mm.h>
 #include "gk20a_allocator.h"
 #include "buddy_allocator_priv.h"
@@ -27,8 +28,59 @@
 static struct kmem_cache *page_alloc_cache;
 static struct kmem_cache *page_alloc_chunk_cache;
+static struct kmem_cache *page_alloc_slab_page_cache;
 static DEFINE_MUTEX(meta_data_cache_lock);
+/*
+ * Handle the book-keeping for these operations.
+ */
+static inline void add_slab_page_to_empty(struct page_alloc_slab *slab,
+                                          struct page_alloc_slab_page *page)
+{
+        BUG_ON(page->state != SP_NONE);
+        list_add(&page->list_entry, &slab->empty);
+        slab->nr_empty++;
+        page->state = SP_EMPTY;
+}
+static inline void add_slab_page_to_partial(struct page_alloc_slab *slab,
+                                            struct page_alloc_slab_page *page)
+{
+        BUG_ON(page->state != SP_NONE);
+        list_add(&page->list_entry, &slab->partial);
+        slab->nr_partial++;
+        page->state = SP_PARTIAL;
+}
+static inline void add_slab_page_to_full(struct page_alloc_slab *slab,
+                                         struct page_alloc_slab_page *page)
+{
+        BUG_ON(page->state != SP_NONE);
+        list_add(&page->list_entry, &slab->full);
+        slab->nr_full++;
+        page->state = SP_FULL;
+}
+static inline void del_slab_page_from_empty(struct page_alloc_slab *slab,
+                                            struct page_alloc_slab_page *page)
+{
+        list_del_init(&page->list_entry);
+        slab->nr_empty--;
+        page->state = SP_NONE;
+}
+static inline void del_slab_page_from_partial(struct page_alloc_slab *slab,
+                                              struct page_alloc_slab_page *page)
+{
+        list_del_init(&page->list_entry);
+        slab->nr_partial--;
+        page->state = SP_NONE;
+}
+static inline void del_slab_page_from_full(struct page_alloc_slab *slab,
+                                           struct page_alloc_slab_page *page)
+{
+        list_del_init(&page->list_entry);
+        slab->nr_full--;
+        page->state = SP_NONE;
+}
 static u64 gk20a_page_alloc_length(struct gk20a_allocator *a)
 {
        struct gk20a_page_allocator *va = a->priv;
@@ -80,6 +132,26 @@ static void gk20a_page_release_co(struct gk20a_allocator *a,
        gk20a_alloc_release_carveout(&va->source_allocator, co);
 }
+static void __gk20a_free_pages(struct gk20a_page_allocator *a,
+                               struct gk20a_page_alloc *alloc,
+                               bool free_buddy_alloc)
+{
+        struct page_alloc_chunk *chunk;
+        while (!list_empty(&alloc->alloc_chunks)) {
+                chunk = list_first_entry(&alloc->alloc_chunks,
+                                         struct page_alloc_chunk,
+                                         list_entry);
+                list_del(&chunk->list_entry);
+                if (free_buddy_alloc)
+                        gk20a_free(&a->source_allocator, chunk->base);
+                kfree(chunk);
+        }
+        kfree(alloc);
+}
 static int __insert_page_alloc(struct gk20a_page_allocator *a,
                               struct gk20a_page_alloc *alloc)
 {
@@ -134,13 +206,236 @@ static struct gk20a_page_alloc *__find_page_alloc(
        return alloc;
 }
+static struct page_alloc_slab_page *alloc_slab_page(
+        struct gk20a_page_allocator *a,
+        struct page_alloc_slab *slab)
+{
+        struct page_alloc_slab_page *slab_page;
+        slab_page = kmem_cache_alloc(page_alloc_slab_page_cache, GFP_KERNEL);
+        if (!slab_page) {
+                palloc_dbg(a, "OOM: unable to alloc slab_page struct!\n");
+                return ERR_PTR(-ENOMEM);
+        }
+        memset(slab_page, 0, sizeof(*slab_page));
+        slab_page->page_addr = gk20a_alloc(&a->source_allocator, a->page_size);
+        if (!slab_page->page_addr) {
+                kfree(slab_page);
+                palloc_dbg(a, "OOM: vidmem is full!\n");
+                return ERR_PTR(-ENOMEM);
+        }
+        INIT_LIST_HEAD(&slab_page->list_entry);
+        slab_page->slab_size = slab->slab_size;
+        slab_page->nr_objects = a->page_size / slab->slab_size;
+        slab_page->nr_objects_alloced = 0;
+        slab_page->owner = slab;
+        slab_page->state = SP_NONE;
+        a->pages_alloced++;
+        palloc_dbg(a, "Allocated new slab page @ 0x%012llx size=%u\n",
+                   slab_page->page_addr, slab_page->slab_size);
+        return slab_page;
+}
+static void free_slab_page(struct gk20a_page_allocator *a,
+                           struct page_alloc_slab_page *slab_page)
+{
+        palloc_dbg(a, "Freeing slab page @ 0x%012llx\n", slab_page->page_addr);
+        BUG_ON((slab_page->state != SP_NONE && slab_page->state != SP_EMPTY) ||
+               slab_page->nr_objects_alloced != 0 ||
+               slab_page->bitmap != 0);
+        gk20a_free(&a->source_allocator, slab_page->page_addr);
+        a->pages_freed++;
+        kmem_cache_free(page_alloc_slab_page_cache, slab_page);
+}
+/*
+ * This expects @alloc to have 1 empty page_alloc_chunk already added to the
+ * alloc_chunks list.
+ */
+static int __do_slab_alloc(struct gk20a_page_allocator *a,
+                           struct page_alloc_slab *slab,
+                           struct gk20a_page_alloc *alloc)
+{
+        struct page_alloc_slab_page *slab_page = NULL;
+        struct page_alloc_chunk *chunk;
+        unsigned long offs;
+        /*
+         * Check the partial and empty lists to see if we have some space
+         * readily available. Take the slab_page out of what ever list it
+         * was in since it may be put back into a different list later.
+         */
+        if (!list_empty(&slab->partial)) {
+                slab_page = list_first_entry(&slab->partial,
+                                             struct page_alloc_slab_page,
+                                             list_entry);
+                del_slab_page_from_partial(slab, slab_page);
+        } else if (!list_empty(&slab->empty)) {
+                slab_page = list_first_entry(&slab->empty,
+                                             struct page_alloc_slab_page,
+                                             list_entry);
+                del_slab_page_from_empty(slab, slab_page);
+        }
+        if (!slab_page) {
+                slab_page = alloc_slab_page(a, slab);
+                if (IS_ERR(slab_page))
+                        return PTR_ERR(slab_page);
+        }
+        /*
+         * We now have a slab_page. Do the alloc.
+         */
+        offs = bitmap_find_next_zero_area(&slab_page->bitmap,
+                                          slab_page->nr_objects,
+                                          0, 1, 0);
+        if (offs >= slab_page->nr_objects) {
+                WARN(1, "Empty/partial slab with no free objects?");
+                /* Add the buggy page to the full list... This isn't ideal. */
+                add_slab_page_to_full(slab, slab_page);
+                return -ENOMEM;
+        }
+        bitmap_set(&slab_page->bitmap, offs, 1);
+        slab_page->nr_objects_alloced++;
+        if (slab_page->nr_objects_alloced < slab_page->nr_objects)
+                add_slab_page_to_partial(slab, slab_page);
+        else if (slab_page->nr_objects_alloced == slab_page->nr_objects)
+                add_slab_page_to_full(slab, slab_page);
+        else
+                BUG(); /* Should be impossible to hit this. */
+        /*
+         * Handle building the gk20a_page_alloc struct. We expect one
+         * page_alloc_chunk to be present.
+         */
+        alloc->slab_page = slab_page;
+        alloc->nr_chunks = 1;
+        alloc->length = slab_page->slab_size;
+        alloc->base = slab_page->page_addr + (offs * slab_page->slab_size);
+        chunk = list_first_entry(&alloc->alloc_chunks,
+                                 struct page_alloc_chunk, list_entry);
+        chunk->base = alloc->base;
+        chunk->length = alloc->length;
+        return 0;
+}
+/*
+ * Allocate from a slab instead of directly from the page allocator.
+ */
+static struct gk20a_page_alloc *__gk20a_alloc_slab(
+        struct gk20a_page_allocator *a, u64 len)
+{
+        int err, slab_nr;
+        struct page_alloc_slab *slab;
+        struct gk20a_page_alloc *alloc = NULL;
+        struct page_alloc_chunk *chunk = NULL;
+        /*
+         * Align the length to a page and then divide by the page size (4k for
+         * this code). ilog2() of that then gets us the correct slab to use.
+         */
+        slab_nr = (int)ilog2(PAGE_ALIGN(len) >> 12);
+        slab = &a->slabs[slab_nr];
+        alloc = kmem_cache_alloc(page_alloc_cache, GFP_KERNEL);
+        if (!alloc) {
+                palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n");
+                goto fail;
+        }
+        chunk = kmem_cache_alloc(page_alloc_chunk_cache, GFP_KERNEL);
+        if (!chunk) {
+                palloc_dbg(a, "OOM: could not alloc alloc_chunk struct!\n");
+                goto fail;
+        }
+        INIT_LIST_HEAD(&alloc->alloc_chunks);
+        list_add(&chunk->list_entry, &alloc->alloc_chunks);
+        err = __do_slab_alloc(a, slab, alloc);
+        if (err)
+                goto fail;
+        palloc_dbg(a, "Alloc 0x%04llx sr=%d id=0x%010llx [slab]\n",
+                   len, slab_nr, alloc->base);
+        a->nr_slab_allocs++;
+        return alloc;
+fail:
+        kfree(alloc);
+        kfree(chunk);
+        return ERR_PTR(-ENOMEM);
+}
+static void __gk20a_free_slab(struct gk20a_page_allocator *a,
+                              struct gk20a_page_alloc *alloc)
+{
+        struct page_alloc_slab_page *slab_page = alloc->slab_page;
+        struct page_alloc_slab *slab = slab_page->owner;
+        enum slab_page_state new_state;
+        int offs;
+        offs = (alloc->base - slab_page->page_addr) / slab_page->slab_size;
+        bitmap_clear(&slab_page->bitmap, offs, 1);
+        slab_page->nr_objects_alloced--;
+        if (slab_page->nr_objects_alloced == 0)
+                new_state = SP_EMPTY;
+        else
+                new_state = SP_PARTIAL;
+        /*
+         * Need to migrate the page to a different list.
+         */
+        if (new_state != slab_page->state) {
+                /* Delete - can't be in empty. */
+                if (slab_page->state == SP_PARTIAL)
+                        del_slab_page_from_partial(slab, slab_page);
+                else
+                        del_slab_page_from_full(slab, slab_page);
+                /* And add. */
+                if (new_state == SP_EMPTY) {
+                        if (list_empty(&slab->empty))
+                                add_slab_page_to_empty(slab, slab_page);
+                        else
+                                free_slab_page(a, slab_page);
+                } else {
+                        add_slab_page_to_partial(slab, slab_page);
+                }
+        }
+        /*
+         * Now handle the page_alloc.
+         */
+        __gk20a_free_pages(a, alloc, false);
+        a->nr_slab_frees++;
+        return;
+}
 /*
 * Allocate physical pages. Since the underlying allocator is a buddy allocator
 * the returned pages are always contiguous. However, since there could be
 * fragmentation in the space this allocator will collate smaller non-contiguous
 * allocations together if necessary.
 */
-static struct gk20a_page_alloc *__gk20a_alloc_pages(
+static struct gk20a_page_alloc *__do_gk20a_alloc_pages(
        struct gk20a_page_allocator *a, u64 pages)
 {
        struct gk20a_page_alloc *alloc;
@@ -152,6 +447,8 @@ static struct gk20a_page_alloc *__gk20a_alloc_pages(
        if (!alloc)
                goto fail;
+        memset(alloc, 0, sizeof(*alloc));
        INIT_LIST_HEAD(&alloc->alloc_chunks);
        alloc->length = pages << a->page_shift;
@@ -233,6 +530,33 @@ fail:
        return ERR_PTR(-ENOMEM);
 }
+static struct gk20a_page_alloc *__gk20a_alloc_pages(
+        struct gk20a_page_allocator *a, u64 len)
+{
+        struct gk20a_page_alloc *alloc = NULL;
+        struct page_alloc_chunk *c;
+        u64 pages;
+        int i = 0;
+        pages = ALIGN(len, a->page_size) >> a->page_shift;
+        alloc = __do_gk20a_alloc_pages(a, pages);
+        if (IS_ERR(alloc)) {
+                palloc_dbg(a, "Alloc 0x%llx (%llu) (failed)\n",
+                           pages << a->page_shift, pages);
+                return NULL;
+        }
+        palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n",
+                   pages << a->page_shift, pages, alloc->base);
+        list_for_each_entry(c, &alloc->alloc_chunks, list_entry) {
+                palloc_dbg(a, "  Chunk %2d: 0x%010llx + 0x%llx\n",
+                           i++, c->base, c->length);
+        }
+        return alloc;
+}
 /*
 * Allocate enough pages to satisfy @len. Page size is determined at
 * initialization of the allocator.
@@ -247,10 +571,7 @@ static u64 gk20a_page_alloc(struct gk20a_allocator *__a, u64 len)
 {
        struct gk20a_page_allocator *a = page_allocator(__a);
        struct gk20a_page_alloc *alloc = NULL;
-        struct page_alloc_chunk *c;
        u64 real_len;
-        u64 pages;
-        int i = 0;
        /*
         * If we want contig pages we have to round up to a power of two. It's
@@ -259,30 +580,24 @@ static u64 gk20a_page_alloc(struct gk20a_allocator *__a, u64 len)
        real_len = a->flags & GPU_ALLOC_FORCE_CONTIG ?
                roundup_pow_of_two(len) : len;
-        pages = ALIGN(real_len, a->page_size) >> a->page_shift;
        alloc_lock(__a);
+        if (a->flags & GPU_ALLOC_4K_VIDMEM_PAGES &&
+            real_len <= (a->page_size / 2))
+                alloc = __gk20a_alloc_slab(a, real_len);
+        else
+                alloc = __gk20a_alloc_pages(a, real_len);
-        alloc = __gk20a_alloc_pages(a, pages);
+        if (!alloc) {
-        if (IS_ERR(alloc)) {
                alloc_unlock(__a);
-                palloc_dbg(a, "Alloc 0x%llx (%llu) (failed)\n",
-                           pages << a->page_shift, pages);
                return 0;
        }
        __insert_page_alloc(a, alloc);
-        alloc_unlock(__a);
-        palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n",
-                   pages << a->page_shift, pages, alloc->base);
-        list_for_each_entry(c, &alloc->alloc_chunks, list_entry) {
-                palloc_dbg(a, "  Chunk %2d: 0x%010llx + 0x%llx\n",
-                           i++, c->base, c->length);
-        }
        a->nr_allocs++;
-        a->pages_alloced += pages;
+        if (real_len > a->page_size / 2)
+                a->pages_alloced += alloc->length >> a->page_shift;
+        alloc_unlock(__a);
        if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER)
                return alloc->base;
@@ -290,24 +605,6 @@ static u64 gk20a_page_alloc(struct gk20a_allocator *__a, u64 len)
                return (u64) (uintptr_t) alloc;
 }
-static void __gk20a_free_pages(struct gk20a_page_allocator *a,
-                               struct gk20a_page_alloc *alloc)
-{
-        struct page_alloc_chunk *chunk;
-        while (!list_empty(&alloc->alloc_chunks)) {
-                chunk = list_first_entry(&alloc->alloc_chunks,
-                                         struct page_alloc_chunk,
-                                         list_entry);
-                list_del(&chunk->list_entry);
-                gk20a_free(&a->source_allocator, chunk->base);
-                kfree(chunk);
-        }
-        kfree(alloc);
-}
 /*
 * Note: this will remove the gk20a_page_alloc struct from the RB tree
 * if it's found.
@@ -331,14 +628,18 @@ static void gk20a_page_free(struct gk20a_allocator *__a, u64 base)
        }
        a->nr_frees++;
-        a->pages_freed += (alloc->length >> a->page_shift);
        /*
         * Frees *alloc.
         */
-        __gk20a_free_pages(a, alloc);
+        if (alloc->slab_page) {
+                __gk20a_free_slab(a, alloc);
+        } else {
+                a->pages_freed += (alloc->length >> a->page_shift);
+                __gk20a_free_pages(a, alloc, true);
+        }
-        palloc_dbg(a, "Free  0x%010llx id=0x%010llx\n",
+        palloc_dbg(a, "Free  0x%llx id=0x%010llx\n",
                   alloc->length, alloc->base);
 done:
@@ -439,7 +740,7 @@ static void gk20a_page_free_fixed(struct gk20a_allocator *__a,
         * allocs. This would have to be updated if the underlying
         * allocator were to change.
         */
-        __gk20a_free_pages(a, alloc);
+        __gk20a_free_pages(a, alloc, true);
        palloc_dbg(a, "Free  [fixed] 0x%010llx + 0x%llx\n",
                   alloc->base, alloc->length);
@@ -464,6 +765,7 @@ static void gk20a_page_print_stats(struct gk20a_allocator *__a,
                                   struct seq_file *s, int lock)
 {
        struct gk20a_page_allocator *a = page_allocator(__a);
+        int i;
        if (lock)
                alloc_lock(__a);
@@ -473,12 +775,33 @@ static void gk20a_page_print_stats(struct gk20a_allocator *__a,
        __alloc_pstat(s, __a, "  frees          %lld\n", a->nr_frees);
        __alloc_pstat(s, __a, "  fixed_allocs   %lld\n", a->nr_fixed_allocs);
        __alloc_pstat(s, __a, "  fixed_frees    %lld\n", a->nr_fixed_frees);
+        __alloc_pstat(s, __a, "  slab_allocs    %lld\n", a->nr_slab_allocs);
+        __alloc_pstat(s, __a, "  slab_frees     %lld\n", a->nr_slab_frees);
        __alloc_pstat(s, __a, "  pages alloced  %lld\n", a->pages_alloced);
        __alloc_pstat(s, __a, "  pages freed    %lld\n", a->pages_freed);
        __alloc_pstat(s, __a, "\n");
+        /*
+         * Slab info.
+         */
+        if (a->flags & GPU_ALLOC_4K_VIDMEM_PAGES) {
+                __alloc_pstat(s, __a, "Slabs:\n");
+                __alloc_pstat(s, __a, "  size      empty     partial   full\n");
+                __alloc_pstat(s, __a, "  ----      -----     -------   ----\n");
+                for (i = 0; i < a->nr_slabs; i++) {
+                        struct page_alloc_slab *slab = &a->slabs[i];
+                        __alloc_pstat(s, __a, "  %-9u %-9d %-9u %u\n",
+                                      slab->slab_size,
+                                      slab->nr_empty, slab->nr_partial,
+                                      slab->nr_full);
+                }
+                __alloc_pstat(s, __a, "\n");
+        }
        __alloc_pstat(s, __a, "Source alloc: %s\n",
                      a->source_allocator.name);
        gk20a_alloc_print_stats(&a->source_allocator, s, lock);
        if (lock)
@@ -506,6 +829,43 @@ static const struct gk20a_allocator_ops page_ops = {
        .print_stats    = gk20a_page_print_stats,
 };
+/*
+ * nr_slabs is computed as follows: divide page_size by 4096 to get number of
+ * 4k pages in page_size. Then take the base 2 log of that to get number of
+ * slabs. For 64k page_size that works on like:
+ *
+ *   1024*64 / 1024*4 = 16
+ *   ilog2(16) = 4
+ *
+ * That gives buckets of 1, 2, 4, and 8 pages (i.e 4k, 8k, 16k, 32k).
+ */
+static int gk20a_page_alloc_init_slabs(struct gk20a_page_allocator *a)
+{
+        size_t nr_slabs = ilog2(a->page_size >> 12);
+        int i;
+        a->slabs = kcalloc(nr_slabs,
+                           sizeof(struct page_alloc_slab),
+                           GFP_KERNEL);
+        if (!a->slabs)
+                return -ENOMEM;
+        a->nr_slabs = nr_slabs;
+        for (i = 0; i < nr_slabs; i++) {
+                struct page_alloc_slab *slab = &a->slabs[i];
+                slab->slab_size = SZ_4K * (1 << i);
+                INIT_LIST_HEAD(&slab->empty);
+                INIT_LIST_HEAD(&slab->partial);
+                INIT_LIST_HEAD(&slab->full);
+                slab->nr_empty = 0;
+                slab->nr_partial = 0;
+                slab->nr_full = 0;
+        }
+        return 0;
+}
 int gk20a_page_allocator_init(struct gk20a_allocator *__a,
                                const char *name, u64 base, u64 length,
                                u64 blk_size, u64 flags)
@@ -519,11 +879,17 @@ int gk20a_page_allocator_init(struct gk20a_allocator *__a,
                page_alloc_cache = KMEM_CACHE(gk20a_page_alloc, 0);
        if (!page_alloc_chunk_cache)
                page_alloc_chunk_cache = KMEM_CACHE(page_alloc_chunk, 0);
+        if (!page_alloc_slab_page_cache)
+                page_alloc_slab_page_cache =
+                        KMEM_CACHE(page_alloc_slab_page, 0);
        mutex_unlock(&meta_data_cache_lock);
        if (!page_alloc_cache || !page_alloc_chunk_cache)
                return -ENOMEM;
+        if (blk_size < SZ_4K)
+                return -EINVAL;
        a = kzalloc(sizeof(struct gk20a_page_allocator), GFP_KERNEL);
        if (!a)
                return -ENOMEM;
@@ -540,6 +906,12 @@ int gk20a_page_allocator_init(struct gk20a_allocator *__a,
        a->owner = __a;
        a->flags = flags;
+        if (flags & GPU_ALLOC_4K_VIDMEM_PAGES && blk_size > SZ_4K) {
+                err = gk20a_page_alloc_init_slabs(a);
+                if (err)
+                        goto fail;
+        }
        snprintf(buddy_name, sizeof(buddy_name), "%s-src", name);
        err = gk20a_buddy_allocator_init(&a->source_allocator, buddy_name, base,
@@ -553,6 +925,7 @@ int gk20a_page_allocator_init(struct gk20a_allocator *__a,
        palloc_dbg(a, "               size      0x%llx\n", a->length);
        palloc_dbg(a, "               page_size 0x%llx\n", a->page_size);
        palloc_dbg(a, "               flags     0x%llx\n", a->flags);
+        palloc_dbg(a, "               slabs:    %d\n", a->nr_slabs);
        return 0;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 1bacb70a..df58feb2 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -925,8 +925,11 @@ static int gk20a_init_vidmem(struct mm_gk20a *mm)
                                        bootstrap_base, bootstrap_size,
                                        SZ_4K, 0);
-        err = gk20a_page_allocator_init(&g->mm.vidmem.allocator, "vidmem",
+        err = gk20a_page_allocator_init(&g->mm.vidmem.allocator,
-                                base, size - base, default_page_size, 0);
+                                        "vidmem",
+                                        base, size - base,
+                                        default_page_size,
+                                        GPU_ALLOC_4K_VIDMEM_PAGES);
        if (err) {
                gk20a_err(d, "Failed to register vidmem for size %zu: %d",
                                size, err);
diff --git a/drivers/gpu/nvgpu/gk20a/page_allocator_priv.h b/drivers/gpu/nvgpu/gk20a/page_allocator_priv.h
index 3d4e3c43..7d7f43c2 100644
--- a/drivers/gpu/nvgpu/gk20a/page_allocator_priv.h
+++ b/drivers/gpu/nvgpu/gk20a/page_allocator_priv.h
@@ -19,12 +19,63 @@
 #include <linux/list.h>
 #include <linux/rbtree.h>
-#include <gk20a/gk20a_allocator.h>
 #include "gk20a_allocator.h"
 struct gk20a_allocator;
+/*
+ * This allocator implements the ability to do SLAB style allocation since the
+ * GPU has two page sizes available - 4k and 64k/128k. When the default
+ * granularity is the large page size (64k/128k) small allocations become very
+ * space inefficient. This is most notable in PDE and PTE blocks which are 4k
+ * in size.
+ *
+ * Thus we need the ability to suballocate in 64k pages. The way we do this for
+ * the GPU is as follows. We have several buckets for sub-64K allocations:
+ *
+ *   B0 - 4k
+ *   B1 - 8k
+ *   B3 - 16k
+ *   B4 - 32k
+ *   B5 - 64k (for when large pages are 128k)
+ *
+ * When an allocation comes in for less than the large page size (from now on
+ * assumed to be 64k) the allocation is satisfied by one of the buckets.
+ */
+struct page_alloc_slab {
+        struct list_head empty;
+        struct list_head partial;
+        struct list_head full;
+        int nr_empty;
+        int nr_partial;
+        int nr_full;
+        u32 slab_size;
+};
+enum slab_page_state {
+        SP_EMPTY,
+        SP_PARTIAL,
+        SP_FULL,
+        SP_NONE
+};
+struct page_alloc_slab_page {
+        unsigned long bitmap;
+        u64 page_addr;
+        u32 slab_size;
+        u32 nr_objects;
+        u32 nr_objects_alloced;
+        enum slab_page_state state;
+        struct page_alloc_slab *owner;
+        struct list_head list_entry;
+};
 struct page_alloc_chunk {
        struct list_head list_entry;
@@ -34,7 +85,7 @@ struct page_alloc_chunk {
 /*
 * Struct to handle internal management of page allocation. It holds a list
- * of the chunks of page that make up the overall allocation - much like a
+ * of the chunks of pages that make up the overall allocation - much like a
 * scatter gather table.
 */
 struct gk20a_page_alloc {
@@ -44,13 +95,20 @@ struct gk20a_page_alloc {
        u64 length;
        /*
-         * Only useful for the RB tree - since the alloc will have discontiguous
+         * Only useful for the RB tree - since the alloc may have discontiguous
         * pages the base is essentially irrelevant except for the fact that it
         * is guarenteed to be unique.
         */
        u64 base;
        struct rb_node tree_entry;
+        /*
+         * Set if this is a slab alloc. Points back to the slab page that owns
+         * this particular allocation. nr_chunks will always be 1 if this is
+         * set.
+         */
+        struct page_alloc_slab_page *slab_page;
 };
 struct gk20a_page_allocator {
@@ -73,6 +131,9 @@ struct gk20a_page_allocator {
        struct rb_root allocs;          /* Outstanding allocations. */
+        struct page_alloc_slab *slabs;
+        int nr_slabs;
        u64 flags;
        /*
@@ -82,6 +143,8 @@ struct gk20a_page_allocator {
        u64 nr_frees;
        u64 nr_fixed_allocs;
        u64 nr_fixed_frees;
+        u64 nr_slab_allocs;
+        u64 nr_slab_frees;
        u64 pages_alloced;
        u64 pages_freed;
 };