From b6569319c772d84087a0a1a6d7146bdcae8e9aab Mon Sep 17 00:00:00 2001
From: Alex Waterman <alexw@nvidia.com>
Date: Fri, 24 Jun 2016 14:12:24 -0700
Subject: gpu: nvgpu: Support multiple types of allocators

Support multiple types of allocation backends. Currently there is
only one allocator implementation available: a buddy allocator.
Buddy allocators have certain limitations though. For one the
allocator requires metadata to be allocated from the kernel's
system memory. This causes a given buddy allocation to potentially
sleep on a kmalloc() call.

This patch has been created so that a new backend can be created
which will avoid any dynamic system memory management routines
from being called.

Bug 1781897

Change-Id: I98d6c8402c049942f13fee69c6901a166f177f65
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/1172115
GVS: Gerrit_Virtual_Submit
Reviewed-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-by: Yu-Huan Hsu <yhsu@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/as_gk20a.c        |   9 +-
 drivers/gpu/nvgpu/gk20a/gk20a_allocator.c | 500 ++++++++++++++++++++----------
 drivers/gpu/nvgpu/gk20a/gk20a_allocator.h | 169 +++++++---
 drivers/gpu/nvgpu/gk20a/mm_gk20a.c        | 112 +++----
 drivers/gpu/nvgpu/gk20a/mm_gk20a.h        |   7 +-
 drivers/gpu/nvgpu/gk20a/pmu_gk20a.c       |  26 +-
 drivers/gpu/nvgpu/vgpu/mm_vgpu.c          |  22 +-
 7 files changed, 554 insertions(+), 291 deletions(-)

(limited to 'drivers/gpu/nvgpu')

diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
index 0571ca1f..8144ec6e 100644
--- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
@@ -279,16 +279,17 @@ static int gk20a_as_ioctl_get_va_regions(
 
 	for (i = 0; i < write_entries; ++i) {
 		struct nvgpu_as_va_region region;
-		struct gk20a_allocator *vma = vm->fixed.init ?
+		struct gk20a_allocator *vma =
+			gk20a_alloc_initialized(&vm->fixed) ?
 			&vm->fixed : &vm->vma[i];
 
 		memset(&region, 0, sizeof(struct nvgpu_as_va_region));
 
 		region.page_size = vm->gmmu_page_sizes[i];
-		region.offset = vma->base;
+		region.offset = gk20a_alloc_base(vma);
 		/* No __aeabi_uldivmod() on some platforms... */
-		region.pages = (vma->end - vma->start) >>
-			ilog2(region.page_size);
+		region.pages = (gk20a_alloc_end(vma) -
+			gk20a_alloc_base(vma)) >> ilog2(region.page_size);
 
 		if (copy_to_user(user_region_ptr + i, &region, sizeof(region)))
 			return -EFAULT;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c
index d3a9202b..f2164768 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c
@@ -17,43 +17,58 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/seq_file.h>
 #include <linux/slab.h>
-#include <linux/debugfs.h>
 
 #include "platform_gk20a.h"
 #include "gk20a_allocator.h"
 
 #include "mm_gk20a.h"
 
-static struct dentry *balloc_debugfs_root;
+static struct dentry *gk20a_alloc_debugfs_root;
 
 static struct kmem_cache *buddy_cache;	/* slab cache for meta data. */
 
-static u32 balloc_tracing_on;
+u32 gk20a_alloc_tracing_on;
 
-#define balloc_trace_func()				\
+#define gk20a_alloc_trace_func()			\
 	do {						\
-		if (balloc_tracing_on)			\
+		if (gk20a_alloc_tracing_on)		\
 			trace_printk("%s\n", __func__);	\
 	} while (0)
 
-#define balloc_trace_func_done()				\
+#define gk20a_alloc_trace_func_done()				\
 	do {							\
-		if (balloc_tracing_on)				\
+		if (gk20a_alloc_tracing_on)			\
 			trace_printk("%s_done\n", __func__);	\
 	} while (0)
 
-
-static void balloc_init_alloc_debug(struct gk20a_allocator *a);
-static void balloc_print_stats(struct gk20a_allocator *a, struct seq_file *s,
-			       int lock);
-static struct gk20a_buddy *balloc_free_buddy(struct gk20a_allocator *a,
+/*
+ * Buddy allocator implementation.
+ */
+static u64  gk20a_buddy_alloc(struct gk20a_allocator *__a, u64 len);
+static void gk20a_buddy_free(struct gk20a_allocator *__a, u64 addr);
+static u64  gk20a_buddy_alloc_fixed(struct gk20a_allocator *__a,
+				    u64 base, u64 len);
+static u64  gk20a_buddy_alloc_base(struct gk20a_allocator *a);
+static u64  gk20a_buddy_alloc_length(struct gk20a_allocator *a);
+static u64  gk20a_buddy_alloc_end(struct gk20a_allocator *a);
+static int  gk20a_buddy_alloc_inited(struct gk20a_allocator *a);
+
+static void gk20a_buddy_allocator_destroy(struct gk20a_allocator *__a);
+static void gk20a_buddy_print_stats(struct gk20a_allocator *__a,
+				    struct seq_file *s, int lock);
+
+/* Some other buddy allocator functions. */
+static struct gk20a_buddy *balloc_free_buddy(struct gk20a_buddy_allocator *a,
 					     u64 addr);
-static void balloc_coalesce(struct gk20a_allocator *a, struct gk20a_buddy *b);
-static void __balloc_do_free_fixed(struct gk20a_allocator *a,
+static void balloc_coalesce(struct gk20a_buddy_allocator *a,
+			    struct gk20a_buddy *b);
+static void __balloc_do_free_fixed(struct gk20a_buddy_allocator *a,
 				   struct gk20a_fixed_alloc *falloc);
 
+/* Debugging. */
+static void gk20a_init_alloc_debug(struct gk20a_allocator *a);
+
 /*
  * This function is not present in older kernel's list.h code.
  */
@@ -62,6 +77,23 @@ static void __balloc_do_free_fixed(struct gk20a_allocator *a,
 	list_entry((ptr)->prev, type, member)
 #endif
 
+static const struct gk20a_allocator_ops buddy_ops = {
+	.alloc		= gk20a_buddy_alloc,
+	.free		= gk20a_buddy_free,
+
+	.alloc_fixed	= gk20a_buddy_alloc_fixed,
+	/* .free_fixed not needed. */
+
+	.base		= gk20a_buddy_alloc_base,
+	.length		= gk20a_buddy_alloc_length,
+	.end		= gk20a_buddy_alloc_end,
+	.inited		= gk20a_buddy_alloc_inited,
+
+	.fini		= gk20a_buddy_allocator_destroy,
+
+	.print_stats	= gk20a_buddy_print_stats,
+};
+
 /*
  * GPU buddy allocator for various address spaces.
  *
@@ -80,13 +112,95 @@ static void __balloc_do_free_fixed(struct gk20a_allocator *a,
  *      easily PDE aligned so this hasn't been a problem.
  */
 
+static u64 gk20a_buddy_alloc_length(struct gk20a_allocator *a)
+{
+	struct gk20a_buddy_allocator *ba = a->priv;
+
+	return ba->length;
+}
+
+static u64 gk20a_buddy_alloc_base(struct gk20a_allocator *a)
+{
+	struct gk20a_buddy_allocator *ba = a->priv;
+
+	return ba->start;
+}
+
+static int gk20a_buddy_alloc_inited(struct gk20a_allocator *a)
+{
+	struct gk20a_buddy_allocator *ba = a->priv;
+
+	return ba->inited;
+}
+static u64 gk20a_buddy_alloc_end(struct gk20a_allocator *a)
+{
+	struct gk20a_buddy_allocator *ba = a->priv;
+
+	return ba->end;
+}
+
+u64 gk20a_alloc_length(struct gk20a_allocator *a)
+{
+	return a->ops->length(a);
+}
+
+u64 gk20a_alloc_base(struct gk20a_allocator *a)
+{
+	return a->ops->base(a);
+}
+
+u64 gk20a_alloc_initialized(struct gk20a_allocator *a)
+{
+	if (!a->ops)
+		return 0;
+
+	return a->ops->inited(a);
+}
+
+u64 gk20a_alloc_end(struct gk20a_allocator *a)
+{
+	return a->ops->end(a);
+}
+
+u64 gk20a_alloc(struct gk20a_allocator *a, u64 len)
+{
+	return a->ops->alloc(a, len);
+}
+
+void gk20a_free(struct gk20a_allocator *a, u64 addr)
+{
+	a->ops->free(a, addr);
+}
+
+u64 gk20a_alloc_fixed(struct gk20a_allocator *a, u64 base, u64 len)
+{
+	return a->ops->alloc_fixed(a, base, len);
+}
+
+void gk20a_free_fixed(struct gk20a_allocator *a, u64 base, u64 len)
+{
+	/*
+	 * If this operation is not defined for the allocator then just do
+	 * nothing. The alternative would be to fall back on the regular
+	 * free but that may be harmful in unexpected ways.
+	 */
+	if (a->ops->free_fixed)
+		a->ops->free_fixed(a, base, len);
+}
+
+void gk20a_alloc_destroy(struct gk20a_allocator *a)
+{
+	a->ops->fini(a);
+	memset(a, 0, sizeof(*a));
+}
+
 /*
  * Pick a suitable maximum order for this allocator.
  *
  * Hueristic: Just guessing that the best max order is the largest single
  * block that will fit in the address space.
  */
-static void balloc_compute_max_order(struct gk20a_allocator *a)
+static void balloc_compute_max_order(struct gk20a_buddy_allocator *a)
 {
 	u64 true_max_order = ilog2(a->blks);
 
@@ -105,9 +219,10 @@ static void balloc_compute_max_order(struct gk20a_allocator *a)
  * Since we can only allocate in chucks of a->blk_size we need to trim off
  * any excess data that is not aligned to a->blk_size.
  */
-static void balloc_allocator_align(struct gk20a_allocator *a)
+static void balloc_allocator_align(struct gk20a_buddy_allocator *a)
 {
 	a->start = ALIGN(a->base, a->blk_size);
+	WARN_ON(a->start != a->base);
 	a->end   = (a->base + a->length) & ~(a->blk_size - 1);
 	a->count = a->end - a->start;
 	a->blks  = a->count >> a->blk_shift;
@@ -116,7 +231,7 @@ static void balloc_allocator_align(struct gk20a_allocator *a)
 /*
  * Pass NULL for parent if you want a top level buddy.
  */
-static struct gk20a_buddy *balloc_new_buddy(struct gk20a_allocator *a,
+static struct gk20a_buddy *balloc_new_buddy(struct gk20a_buddy_allocator *a,
 					    struct gk20a_buddy *parent,
 					    u64 start, u64 order)
 {
@@ -136,13 +251,14 @@ static struct gk20a_buddy *balloc_new_buddy(struct gk20a_allocator *a,
 	return new_buddy;
 }
 
-static void __balloc_buddy_list_add(struct gk20a_allocator *a,
+static void __balloc_buddy_list_add(struct gk20a_buddy_allocator *a,
 				    struct gk20a_buddy *b,
 				    struct list_head *list)
 {
 	if (buddy_is_in_list(b)) {
-		balloc_dbg(a, "Oops: adding added buddy (%llu:0x%llx)\n",
-			   b->order, b->start);
+		alloc_dbg(balloc_owner(a),
+			  "Oops: adding added buddy (%llu:0x%llx)\n",
+			  b->order, b->start);
 		BUG();
 	}
 
@@ -160,12 +276,13 @@ static void __balloc_buddy_list_add(struct gk20a_allocator *a,
 	buddy_set_in_list(b);
 }
 
-static void __balloc_buddy_list_rem(struct gk20a_allocator *a,
+static void __balloc_buddy_list_rem(struct gk20a_buddy_allocator *a,
 				    struct gk20a_buddy *b)
 {
 	if (!buddy_is_in_list(b)) {
-		balloc_dbg(a, "Oops: removing removed buddy (%llu:0x%llx)\n",
-			   b->order, b->start);
+		alloc_dbg(balloc_owner(a),
+			  "Oops: removing removed buddy (%llu:0x%llx)\n",
+			  b->order, b->start);
 		BUG();
 	}
 
@@ -177,19 +294,21 @@ static void __balloc_buddy_list_rem(struct gk20a_allocator *a,
  * Add a buddy to one of the buddy lists and deal with the necessary
  * book keeping. Adds the buddy to the list specified by the buddy's order.
  */
-static void balloc_blist_add(struct gk20a_allocator *a, struct gk20a_buddy *b)
+static void balloc_blist_add(struct gk20a_buddy_allocator *a,
+			     struct gk20a_buddy *b)
 {
 	__balloc_buddy_list_add(a, b, balloc_get_order_list(a, b->order));
 	a->buddy_list_len[b->order]++;
 }
 
-static void balloc_blist_rem(struct gk20a_allocator *a, struct gk20a_buddy *b)
+static void balloc_blist_rem(struct gk20a_buddy_allocator *a,
+			     struct gk20a_buddy *b)
 {
 	__balloc_buddy_list_rem(a, b);
 	a->buddy_list_len[b->order]--;
 }
 
-static u64 balloc_get_order(struct gk20a_allocator *a, u64 len)
+static u64 balloc_get_order(struct gk20a_buddy_allocator *a, u64 len)
 {
 	if (len == 0)
 		return 0;
@@ -200,7 +319,8 @@ static u64 balloc_get_order(struct gk20a_allocator *a, u64 len)
 	return fls(len);
 }
 
-static u64 __balloc_max_order_in(struct gk20a_allocator *a, u64 start, u64 end)
+static u64 __balloc_max_order_in(struct gk20a_buddy_allocator *a,
+				 u64 start, u64 end)
 {
 	u64 size = (end - start) >> a->blk_shift;
 
@@ -213,7 +333,7 @@ static u64 __balloc_max_order_in(struct gk20a_allocator *a, u64 start, u64 end)
 /*
  * Initialize the buddy lists.
  */
-static int balloc_init_lists(struct gk20a_allocator *a)
+static int balloc_init_lists(struct gk20a_buddy_allocator *a)
 {
 	int i;
 	u64 bstart, bend, order;
@@ -252,6 +372,26 @@ cleanup:
 	return -ENOMEM;
 }
 
+/*
+ * Handle the common init stuff for a gk20a_allocator.
+ */
+static int __gk20a_alloc_common_init(struct gk20a_allocator *a,
+				     const char *name, void *priv,
+				     const struct gk20a_allocator_ops *ops)
+{
+	if (!ops)
+		return -EINVAL;
+
+	a->ops = ops;
+	a->priv = priv;
+
+	mutex_init(&a->lock);
+
+	strlcpy(a->name, name, sizeof(a->name));
+
+	return 0;
+}
+
 /*
  * Initialize a buddy allocator. Returns 0 on success. This allocator does
  * not necessarily manage bytes. It manages distinct ranges of resources. This
@@ -270,20 +410,40 @@ cleanup:
  *             will try and pick a reasonable max order.
  * @flags: Extra flags necessary. See GPU_BALLOC_*.
  */
-int __gk20a_allocator_init(struct gk20a_allocator *a,
-			   struct vm_gk20a *vm, const char *name,
-			   u64 base, u64 size, u64 blk_size, u64 max_order,
-			   u64 flags)
+int __gk20a_buddy_allocator_init(struct gk20a_allocator *__a,
+				 struct vm_gk20a *vm, const char *name,
+				 u64 base, u64 size, u64 blk_size,
+				 u64 max_order, u64 flags)
 {
 	int err;
+	struct gk20a_buddy_allocator *a;
+
+	/* blk_size must be greater than 0 and a power of 2. */
+	if (blk_size == 0)
+		return -EINVAL;
+	if (blk_size & (blk_size - 1))
+		return -EINVAL;
 
-	memset(a, 0, sizeof(struct gk20a_allocator));
-	strncpy(a->name, name, 32);
+	if (max_order > GPU_BALLOC_MAX_ORDER)
+		return -EINVAL;
+
+	/* If this is to manage a GVA space we need a VM. */
+	if (flags & GPU_BALLOC_GVA_SPACE && !vm)
+		return -EINVAL;
+
+	a = kzalloc(sizeof(struct gk20a_buddy_allocator), GFP_KERNEL);
+	if (!a)
+		return -ENOMEM;
+
+	err = __gk20a_alloc_common_init(__a, name, a, &buddy_ops);
+	if (err)
+		goto fail;
 
 	a->base = base;
 	a->length = size;
 	a->blk_size = blk_size;
 	a->blk_shift = __ffs(blk_size);
+	a->owner = __a;
 
 	/*
 	 * If base is 0 then modfy base to be the size of one block so that we
@@ -294,19 +454,6 @@ int __gk20a_allocator_init(struct gk20a_allocator *a,
 		a->length -= a->blk_size;
 	}
 
-	/* blk_size must be greater than 0 and a power of 2. */
-	if (blk_size == 0)
-		return -EINVAL;
-	if (blk_size & (blk_size - 1))
-		return -EINVAL;
-
-	if (max_order > GPU_BALLOC_MAX_ORDER)
-		return -EINVAL;
-
-	/* If this is to manage a GVA space we need a VM. */
-	if (flags & GPU_BALLOC_GVA_SPACE && !vm)
-		return -EINVAL;
-
 	a->vm = vm;
 	if (flags & GPU_BALLOC_GVA_SPACE)
 		a->pte_blk_order = balloc_get_order(a, vm->big_page_size << 10);
@@ -320,49 +467,55 @@ int __gk20a_allocator_init(struct gk20a_allocator *a,
 	/* Shared buddy kmem_cache for all allocators. */
 	if (!buddy_cache)
 		buddy_cache = KMEM_CACHE(gk20a_buddy, 0);
-	if (!buddy_cache)
-		return -ENOMEM;
+	if (!buddy_cache) {
+		err = -ENOMEM;
+		goto fail;
+	}
 
 	a->alloced_buddies = RB_ROOT;
+	a->fixed_allocs = RB_ROOT;
 	err = balloc_init_lists(a);
 	if (err)
-		return err;
-
-	mutex_init(&a->lock);
+		goto fail;
 
-	a->init = 1;
+	a->inited = 1;
 
-	balloc_init_alloc_debug(a);
-	balloc_dbg(a, "New allocator: base      0x%llx\n", a->base);
-	balloc_dbg(a, "               size      0x%llx\n", a->length);
-	balloc_dbg(a, "               blk_size  0x%llx\n", a->blk_size);
-	balloc_dbg(a, "               max_order %llu\n", a->max_order);
-	balloc_dbg(a, "               flags     0x%llx\n", a->flags);
+	gk20a_init_alloc_debug(__a);
+	alloc_dbg(__a, "New allocator: base      0x%llx\n", a->base);
+	alloc_dbg(__a, "               size      0x%llx\n", a->length);
+	alloc_dbg(__a, "               blk_size  0x%llx\n", a->blk_size);
+	alloc_dbg(__a, "               max_order %llu\n", a->max_order);
+	alloc_dbg(__a, "               flags     0x%llx\n", a->flags);
 
 	return 0;
+
+fail:
+	kfree(a);
+	return err;
 }
 
-int gk20a_allocator_init(struct gk20a_allocator *a, const char *name,
-			 u64 base, u64 size, u64 blk_size)
+int gk20a_buddy_allocator_init(struct gk20a_allocator *a, const char *name,
+			       u64 base, u64 size, u64 blk_size, u64 flags)
 {
-	return __gk20a_allocator_init(a, NULL, name,
-				      base, size, blk_size, 0, 0);
+	return __gk20a_buddy_allocator_init(a, NULL, name,
+					    base, size, blk_size, 0, 0);
 }
 
 /*
  * Clean up and destroy the passed allocator.
  */
-void gk20a_allocator_destroy(struct gk20a_allocator *a)
+static void gk20a_buddy_allocator_destroy(struct gk20a_allocator *__a)
 {
+	int i;
 	struct rb_node *node;
 	struct gk20a_buddy *bud;
 	struct gk20a_fixed_alloc *falloc;
-	int i;
+	struct gk20a_buddy_allocator *a = __a->priv;
 
-	balloc_lock(a);
+	alloc_lock(__a);
 
-	if (!IS_ERR_OR_NULL(a->debugfs_entry))
-		debugfs_remove(a->debugfs_entry);
+	if (!IS_ERR_OR_NULL(__a->debugfs_entry))
+		debugfs_remove(__a->debugfs_entry);
 
 	/*
 	 * Free the fixed allocs first.
@@ -415,16 +568,9 @@ void gk20a_allocator_destroy(struct gk20a_allocator *a)
 		}
 	}
 
-	a->init = 0;
+	kfree(a);
 
-	balloc_unlock(a);
-
-	/*
-	 * We cant unlock an allocator after memsetting it. That wipes the
-	 * state of the mutex. Hopefully no one uses the allocator after
-	 * destroying it...
-	 */
-	memset(a, 0, sizeof(struct gk20a_allocator));
+	alloc_unlock(__a);
 }
 
 /*
@@ -433,7 +579,8 @@ void gk20a_allocator_destroy(struct gk20a_allocator *a)
  *
  * @a must be locked.
  */
-static void balloc_coalesce(struct gk20a_allocator *a, struct gk20a_buddy *b)
+static void balloc_coalesce(struct gk20a_buddy_allocator *a,
+			    struct gk20a_buddy *b)
 {
 	struct gk20a_buddy *parent;
 
@@ -473,8 +620,8 @@ static void balloc_coalesce(struct gk20a_allocator *a, struct gk20a_buddy *b)
  *
  * @a must be locked.
  */
-static int balloc_split_buddy(struct gk20a_allocator *a, struct gk20a_buddy *b,
-			      int pte_size)
+static int balloc_split_buddy(struct gk20a_buddy_allocator *a,
+			      struct gk20a_buddy *b, int pte_size)
 {
 	struct gk20a_buddy *left, *right;
 	u64 half;
@@ -521,7 +668,8 @@ static int balloc_split_buddy(struct gk20a_allocator *a, struct gk20a_buddy *b,
  *
  * @a must be locked.
  */
-static void balloc_alloc_buddy(struct gk20a_allocator *a, struct gk20a_buddy *b)
+static void balloc_alloc_buddy(struct gk20a_buddy_allocator *a,
+			       struct gk20a_buddy *b)
 {
 	struct rb_node **new = &(a->alloced_buddies.rb_node);
 	struct rb_node *parent = NULL;
@@ -552,7 +700,7 @@ static void balloc_alloc_buddy(struct gk20a_allocator *a, struct gk20a_buddy *b)
  *
  * @a must be locked.
  */
-static struct gk20a_buddy *balloc_free_buddy(struct gk20a_allocator *a,
+static struct gk20a_buddy *balloc_free_buddy(struct gk20a_buddy_allocator *a,
 					     u64 addr)
 {
 	struct rb_node *node = a->alloced_buddies.rb_node;
@@ -582,7 +730,7 @@ static struct gk20a_buddy *balloc_free_buddy(struct gk20a_allocator *a,
 /*
  * Find a suitable buddy for the given order and PTE type (big or little).
  */
-static struct gk20a_buddy *__balloc_find_buddy(struct gk20a_allocator *a,
+static struct gk20a_buddy *__balloc_find_buddy(struct gk20a_buddy_allocator *a,
 					       u64 order, int pte_size)
 {
 	struct gk20a_buddy *bud;
@@ -615,7 +763,8 @@ static struct gk20a_buddy *__balloc_find_buddy(struct gk20a_allocator *a,
  *
  * @a must be locked.
  */
-static u64 __balloc_do_alloc(struct gk20a_allocator *a, u64 order, int pte_size)
+static u64 __balloc_do_alloc(struct gk20a_buddy_allocator *a,
+			     u64 order, int pte_size)
 {
 	u64 split_order;
 	struct gk20a_buddy *bud = NULL;
@@ -644,21 +793,22 @@ static u64 __balloc_do_alloc(struct gk20a_allocator *a, u64 order, int pte_size)
 /*
  * Allocate memory from the passed allocator.
  */
-u64 gk20a_balloc(struct gk20a_allocator *a, u64 len)
+static u64 gk20a_buddy_alloc(struct gk20a_allocator *__a, u64 len)
 {
 	u64 order, addr;
 	int pte_size;
+	struct gk20a_buddy_allocator *a = __a->priv;
 
-	balloc_trace_func();
+	gk20a_alloc_trace_func();
 
-	balloc_lock(a);
+	alloc_lock(__a);
 
 	order = balloc_get_order(a, len);
 
 	if (order > a->max_order) {
-		balloc_unlock(a);
-		balloc_dbg(a, "Alloc fail\n");
-		balloc_trace_func_done();
+		alloc_unlock(__a);
+		alloc_dbg(balloc_owner(a), "Alloc fail\n");
+		gk20a_alloc_trace_func_done();
 		return 0;
 	}
 
@@ -681,18 +831,19 @@ u64 gk20a_balloc(struct gk20a_allocator *a, u64 len)
 	if (addr) {
 		a->bytes_alloced += len;
 		a->bytes_alloced_real += balloc_order_to_len(a, order);
-		balloc_dbg(a, "Alloc 0x%-10llx %3lld:0x%-10llx pte_size=%s\n",
-			   addr, order, len,
+		alloc_dbg(balloc_owner(a),
+			  "Alloc 0x%-10llx %3lld:0x%-10llx pte_size=%s\n",
+			  addr, order, len,
 			   pte_size == gmmu_page_size_big   ? "big" :
 			   pte_size == gmmu_page_size_small ? "small" :
 			   "NA/any");
 	} else {
-		balloc_dbg(a, "Alloc failed: no mem!\n");
+		alloc_dbg(balloc_owner(a), "Alloc failed: no mem!\n");
 	}
 
-	balloc_unlock(a);
+	alloc_unlock(__a);
 
-	balloc_trace_func_done();
+	gk20a_alloc_trace_func_done();
 	return addr;
 }
 
@@ -703,7 +854,8 @@ u64 gk20a_balloc(struct gk20a_allocator *a, u64 len)
  * TODO: Right now this uses the unoptimal approach of going through all
  * outstanding allocations and checking their base/ends. This could be better.
  */
-static int balloc_is_range_free(struct gk20a_allocator *a, u64 base, u64 end)
+static int balloc_is_range_free(struct gk20a_buddy_allocator *a,
+				u64 base, u64 end)
 {
 	struct rb_node *node;
 	struct gk20a_buddy *bud;
@@ -728,7 +880,7 @@ static int balloc_is_range_free(struct gk20a_allocator *a, u64 base, u64 end)
 	return 1;
 }
 
-static void balloc_alloc_fixed(struct gk20a_allocator *a,
+static void balloc_alloc_fixed(struct gk20a_buddy_allocator *a,
 			       struct gk20a_fixed_alloc *f)
 {
 	struct rb_node **new = &(a->fixed_allocs.rb_node);
@@ -758,8 +910,8 @@ static void balloc_alloc_fixed(struct gk20a_allocator *a,
  *
  * @a must be locked.
  */
-static struct gk20a_fixed_alloc *balloc_free_fixed(struct gk20a_allocator *a,
-						   u64 addr)
+static struct gk20a_fixed_alloc *balloc_free_fixed(
+	struct gk20a_buddy_allocator *a, u64 addr)
 {
 	struct rb_node *node = a->fixed_allocs.rb_node;
 	struct gk20a_fixed_alloc *falloc;
@@ -788,7 +940,7 @@ static struct gk20a_fixed_alloc *balloc_free_fixed(struct gk20a_allocator *a,
  * Find the parent range - doesn't necessarily need the parent to actually exist
  * as a buddy. Finding an existing parent comes later...
  */
-static void __balloc_get_parent_range(struct gk20a_allocator *a,
+static void __balloc_get_parent_range(struct gk20a_buddy_allocator *a,
 				      u64 base, u64 order,
 				      u64 *pbase, u64 *porder)
 {
@@ -808,8 +960,8 @@ static void __balloc_get_parent_range(struct gk20a_allocator *a,
  * Makes a buddy at the passed address. This will make all parent buddies
  * necessary for this buddy to exist as well.
  */
-static struct gk20a_buddy *__balloc_make_fixed_buddy(struct gk20a_allocator *a,
-						     u64 base, u64 order)
+static struct gk20a_buddy *__balloc_make_fixed_buddy(
+	struct gk20a_buddy_allocator *a, u64 base, u64 order)
 {
 	struct gk20a_buddy *bud = NULL;
 	struct list_head *order_list;
@@ -843,7 +995,7 @@ static struct gk20a_buddy *__balloc_make_fixed_buddy(struct gk20a_allocator *a,
 	}
 
 	if (cur_order > a->max_order) {
-		balloc_dbg(a, "No buddy for range ???\n");
+		alloc_dbg(balloc_owner(a), "No buddy for range ???\n");
 		return NULL;
 	}
 
@@ -864,7 +1016,7 @@ static struct gk20a_buddy *__balloc_make_fixed_buddy(struct gk20a_allocator *a,
 	return bud;
 }
 
-static u64 __balloc_do_alloc_fixed(struct gk20a_allocator *a,
+static u64 __balloc_do_alloc_fixed(struct gk20a_buddy_allocator *a,
 				   struct gk20a_fixed_alloc *falloc,
 				   u64 base, u64 len)
 {
@@ -880,7 +1032,8 @@ static u64 __balloc_do_alloc_fixed(struct gk20a_allocator *a,
 				    __fls(len >> a->blk_shift));
 
 	if (align_order > a->max_order) {
-		balloc_dbg(a, "Align order too big: %llu > %llu\n",
+		alloc_dbg(balloc_owner(a),
+			   "Align order too big: %llu > %llu\n",
 			   align_order, a->max_order);
 		return 0;
 	}
@@ -898,7 +1051,8 @@ static u64 __balloc_do_alloc_fixed(struct gk20a_allocator *a,
 					balloc_base_unshift(a, inc_base),
 					align_order);
 		if (!bud) {
-			balloc_dbg(a, "Fixed buddy failed: {0x%llx, %llu}!\n",
+			alloc_dbg(balloc_owner(a),
+				   "Fixed buddy failed: {0x%llx, %llu}!\n",
 				   balloc_base_unshift(a, inc_base),
 				   align_order);
 			goto err_and_cleanup;
@@ -943,13 +1097,15 @@ err_and_cleanup:
  *
  * Please do not use this function unless _absolutely_ necessary.
  */
-u64 gk20a_balloc_fixed(struct gk20a_allocator *a, u64 base, u64 len)
+static u64 gk20a_buddy_alloc_fixed(struct gk20a_allocator *__a,
+				   u64 base, u64 len)
 {
-	struct gk20a_fixed_alloc *falloc = NULL;
-	struct gk20a_buddy *bud;
 	u64 ret, real_bytes = 0;
+	struct gk20a_buddy *bud;
+	struct gk20a_fixed_alloc *falloc = NULL;
+	struct gk20a_buddy_allocator *a = __a->priv;
 
-	balloc_trace_func();
+	gk20a_alloc_trace_func();
 
 	/* If base isn't aligned to an order 0 block, fail. */
 	if (base & (a->blk_size - 1))
@@ -966,16 +1122,18 @@ u64 gk20a_balloc_fixed(struct gk20a_allocator *a, u64 base, u64 len)
 	falloc->start = base;
 	falloc->end = base + len;
 
-	balloc_lock(a);
+	alloc_lock(__a);
 	if (!balloc_is_range_free(a, base, base + len)) {
-		balloc_dbg(a, "Range not free: 0x%llx -> 0x%llx\n",
+		alloc_dbg(balloc_owner(a),
+			   "Range not free: 0x%llx -> 0x%llx\n",
 			   base, base + len);
 		goto fail_unlock;
 	}
 
 	ret = __balloc_do_alloc_fixed(a, falloc, base, len);
 	if (!ret) {
-		balloc_dbg(a, "Alloc-fixed failed ?? 0x%llx -> 0x%llx\n",
+		alloc_dbg(balloc_owner(a),
+			   "Alloc-fixed failed ?? 0x%llx -> 0x%llx\n",
 			   base, base + len);
 		goto fail_unlock;
 	}
@@ -988,21 +1146,21 @@ u64 gk20a_balloc_fixed(struct gk20a_allocator *a, u64 base, u64 len)
 	a->bytes_alloced += len;
 	a->bytes_alloced_real += real_bytes;
 
-	balloc_unlock(a);
-	balloc_dbg(a, "Alloc (fixed) 0x%llx\n", base);
+	alloc_unlock(__a);
+	alloc_dbg(balloc_owner(a), "Alloc (fixed) 0x%llx\n", base);
 
-	balloc_trace_func_done();
+	gk20a_alloc_trace_func_done();
 	return base;
 
 fail_unlock:
-	balloc_unlock(a);
+	alloc_unlock(__a);
 fail:
 	kfree(falloc);
-	balloc_trace_func_done();
+	gk20a_alloc_trace_func_done();
 	return 0;
 }
 
-static void __balloc_do_free_fixed(struct gk20a_allocator *a,
+static void __balloc_do_free_fixed(struct gk20a_buddy_allocator *a,
 				   struct gk20a_fixed_alloc *falloc)
 {
 	struct gk20a_buddy *bud;
@@ -1029,19 +1187,20 @@ static void __balloc_do_free_fixed(struct gk20a_allocator *a,
 /*
  * Free the passed allocation.
  */
-void gk20a_bfree(struct gk20a_allocator *a, u64 addr)
+static void gk20a_buddy_free(struct gk20a_allocator *__a, u64 addr)
 {
 	struct gk20a_buddy *bud;
 	struct gk20a_fixed_alloc *falloc;
+	struct gk20a_buddy_allocator *a = __a->priv;
 
-	balloc_trace_func();
+	gk20a_alloc_trace_func();
 
 	if (!addr) {
-		balloc_trace_func_done();
+		gk20a_alloc_trace_func_done();
 		return;
 	}
 
-	balloc_lock(a);
+	alloc_lock(__a);
 
 	/*
 	 * First see if this is a fixed alloc. If not fall back to a regular
@@ -1066,9 +1225,9 @@ void gk20a_bfree(struct gk20a_allocator *a, u64 addr)
 	balloc_coalesce(a, bud);
 
 done:
-	balloc_unlock(a);
-	balloc_dbg(a, "Free 0x%llx\n", addr);
-	balloc_trace_func_done();
+	alloc_unlock(__a);
+	alloc_dbg(balloc_owner(a), "Free 0x%llx\n", addr);
+	gk20a_alloc_trace_func_done();
 	return;
 }
 
@@ -1077,49 +1236,42 @@ done:
  * stats are printed to the kernel log. This lets this code be used for
  * debugging purposes internal to the allocator.
  */
-static void balloc_print_stats(struct gk20a_allocator *a, struct seq_file *s,
-			       int lock)
+static void gk20a_buddy_print_stats(struct gk20a_allocator *__a,
+				    struct seq_file *s, int lock)
 {
-#define __balloc_pstat(s, fmt, arg...)			\
-	do {						\
-		if (s)					\
-			seq_printf(s, fmt, ##arg);	\
-		else					\
-			balloc_dbg(a, fmt, ##arg);	\
-	} while (0)
-
 	int i;
 	struct rb_node *node;
 	struct gk20a_fixed_alloc *falloc;
+	struct gk20a_buddy_allocator *a = __a->priv;
 
-	__balloc_pstat(s, "base = %llu, limit = %llu, blk_size = %llu\n",
-		   a->base, a->length, a->blk_size);
-	__balloc_pstat(s, "Internal params:\n");
-	__balloc_pstat(s, "  start = 0x%llx\n", a->start);
-	__balloc_pstat(s, "  end   = 0x%llx\n", a->end);
-	__balloc_pstat(s, "  count = 0x%llx\n", a->count);
-	__balloc_pstat(s, "  blks  = 0x%llx\n", a->blks);
-	__balloc_pstat(s, "  max_order = %llu\n", a->max_order);
+	__alloc_pstat(s, __a, "base = %llu, limit = %llu, blk_size = %llu\n",
+		      a->base, a->length, a->blk_size);
+	__alloc_pstat(s, __a, "Internal params:\n");
+	__alloc_pstat(s, __a, "  start = 0x%llx\n", a->start);
+	__alloc_pstat(s, __a, "  end   = 0x%llx\n", a->end);
+	__alloc_pstat(s, __a, "  count = 0x%llx\n", a->count);
+	__alloc_pstat(s, __a, "  blks  = 0x%llx\n", a->blks);
+	__alloc_pstat(s, __a, "  max_order = %llu\n", a->max_order);
 
-	__balloc_pstat(s, "Buddy blocks:\n");
-	__balloc_pstat(s, "  Order   Free    Alloced   Split\n");
-	__balloc_pstat(s, "  -----   ----    -------   -----\n");
+	__alloc_pstat(s, __a, "Buddy blocks:\n");
+	__alloc_pstat(s, __a, "  Order   Free    Alloced   Split\n");
+	__alloc_pstat(s, __a, "  -----   ----    -------   -----\n");
 
 	if (lock)
-		balloc_lock(a);
+		alloc_lock(__a);
 	for (i = a->max_order; i >= 0; i--) {
 		if (a->buddy_list_len[i] == 0 &&
 		    a->buddy_list_alloced[i] == 0 &&
 		    a->buddy_list_split[i] == 0)
 			continue;
 
-		__balloc_pstat(s, "  %3d     %-7llu %-9llu %llu\n", i,
-			       a->buddy_list_len[i],
-			       a->buddy_list_alloced[i],
-			       a->buddy_list_split[i]);
+		__alloc_pstat(s, __a, "  %3d     %-7llu %-9llu %llu\n", i,
+			      a->buddy_list_len[i],
+			      a->buddy_list_alloced[i],
+			      a->buddy_list_split[i]);
 	}
 
-	__balloc_pstat(s, "\n");
+	__alloc_pstat(s, __a, "\n");
 
 	for (node = rb_first(&a->fixed_allocs), i = 1;
 	     node != NULL;
@@ -1127,27 +1279,33 @@ static void balloc_print_stats(struct gk20a_allocator *a, struct seq_file *s,
 		falloc = container_of(node,
 				      struct gk20a_fixed_alloc, alloced_entry);
 
-		__balloc_pstat(s, "Fixed alloc (%d): [0x%llx -> 0x%llx]\n",
-				i, falloc->start, falloc->end);
+		__alloc_pstat(s, __a, "Fixed alloc (%d): [0x%llx -> 0x%llx]\n",
+			      i, falloc->start, falloc->end);
 	}
 
-	__balloc_pstat(s, "\n");
-	__balloc_pstat(s, "Bytes allocated:        %llu\n", a->bytes_alloced);
-	__balloc_pstat(s, "Bytes allocated (real): %llu\n",
-		       a->bytes_alloced_real);
-	__balloc_pstat(s, "Bytes freed:            %llu\n", a->bytes_freed);
+	__alloc_pstat(s, __a, "\n");
+	__alloc_pstat(s, __a, "Bytes allocated:        %llu\n",
+		      a->bytes_alloced);
+	__alloc_pstat(s, __a, "Bytes allocated (real): %llu\n",
+		      a->bytes_alloced_real);
+	__alloc_pstat(s, __a, "Bytes freed:            %llu\n",
+		      a->bytes_freed);
 
 	if (lock)
-		balloc_unlock(a);
+		alloc_unlock(__a);
+}
 
-#undef __balloc_pstats
+void gk20a_alloc_print_stats(struct gk20a_allocator *__a,
+			     struct seq_file *s, int lock)
+{
+	__a->ops->print_stats(__a, s, lock);
 }
 
 static int __alloc_show(struct seq_file *s, void *unused)
 {
 	struct gk20a_allocator *a = s->private;
 
-	balloc_print_stats(a, s, 1);
+	gk20a_alloc_print_stats(a, s, 1);
 
 	return 0;
 }
@@ -1164,13 +1322,13 @@ static const struct file_operations __alloc_fops = {
 	.release = single_release,
 };
 
-static void balloc_init_alloc_debug(struct gk20a_allocator *a)
+static void gk20a_init_alloc_debug(struct gk20a_allocator *a)
 {
-	if (!balloc_debugfs_root)
+	if (!gk20a_alloc_debugfs_root)
 		return;
 
 	a->debugfs_entry = debugfs_create_file(a->name, S_IRUGO,
-					       balloc_debugfs_root,
+					       gk20a_alloc_debugfs_root,
 					       a, &__alloc_fops);
 }
 
@@ -1180,11 +1338,11 @@ void gk20a_alloc_debugfs_init(struct platform_device *pdev)
 	struct gk20a_platform *platform = platform_get_drvdata(pdev);
 	struct dentry *gpu_root = platform->debugfs;
 
-	balloc_debugfs_root = debugfs_create_dir("allocators", gpu_root);
-	if (IS_ERR_OR_NULL(balloc_debugfs_root))
+	gk20a_alloc_debugfs_root = debugfs_create_dir("allocators", gpu_root);
+	if (IS_ERR_OR_NULL(gk20a_alloc_debugfs_root))
 		return;
 
-	debugfs_create_u32("tracing", 0664, balloc_debugfs_root,
-			   &balloc_tracing_on);
+	debugfs_create_u32("tracing", 0664, gk20a_alloc_debugfs_root,
+			   &gk20a_alloc_tracing_on);
 }
 #endif
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h
index e86e053b..74e23e6c 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h
@@ -20,10 +20,49 @@
 #include <linux/list.h>
 #include <linux/rbtree.h>
 #include <linux/debugfs.h>
+#include <linux/seq_file.h>
 #include <linux/platform_device.h>
 
 /* #define ALLOCATOR_DEBUG */
 
+struct gk20a_allocator;
+struct vm_gk20a;
+
+/*
+ * Operations for an allocator to implement.
+ */
+struct gk20a_allocator_ops {
+	u64  (*alloc)(struct gk20a_allocator *allocator, u64 len);
+	void (*free)(struct gk20a_allocator *allocator, u64 addr);
+
+	/*
+	 * Special interface to allocate a memory region with a specific
+	 * starting address. Yikes. Note: if free() works for freeing both
+	 * regular and fixed allocations then free_fixed() does not need to
+	 * be implemented. This behavior exists for legacy reasons and should
+	 * not be propagated to new allocators.
+	 */
+	u64  (*alloc_fixed)(struct gk20a_allocator *allocator,
+			     u64 base, u64 len);
+	void (*free_fixed)(struct gk20a_allocator *allocator,
+			    u64 base, u64 len);
+
+	/*
+	 * Returns info about the allocator.
+	 */
+	u64  (*base)(struct gk20a_allocator *allocator);
+	u64  (*length)(struct gk20a_allocator *allocator);
+	u64  (*end)(struct gk20a_allocator *allocator);
+	int  (*inited)(struct gk20a_allocator *allocator);
+
+	/* Destructor. */
+	void (*fini)(struct gk20a_allocator *allocator);
+
+	/* Debugging. */
+	void (*print_stats)(struct gk20a_allocator *allocator,
+			    struct seq_file *s, int lock);
+};
+
 /*
  * Each buddy is an element in a binary tree.
  */
@@ -97,8 +136,6 @@ struct gk20a_fixed_alloc {
 	u64 end;			/* End address. */
 };
 
-struct vm_gk20a;
-
 /*
  * GPU buddy allocator for the various GPU address spaces. Each addressable unit
  * doesn't have to correspond to a byte. In some cases each unit is a more
@@ -109,12 +146,10 @@ struct vm_gk20a;
  *
  * order_size is the size of an order 0 buddy.
  */
-struct gk20a_allocator {
-
+struct gk20a_buddy_allocator {
+	struct gk20a_allocator *owner;	/* Owner of this buddy allocator. */
 	struct vm_gk20a *vm;		/* Parent VM - can be NULL. */
 
-	char name[32];			/* Name of allocator. */
-
 	u64 base;			/* Base address of the space. */
 	u64 length;			/* Length of the space. */
 	u64 blk_size;			/* Size of order 0 allocation. */
@@ -132,11 +167,6 @@ struct gk20a_allocator {
 	struct rb_root alloced_buddies;	/* Outstanding allocations. */
 	struct rb_root fixed_allocs;	/* Outstanding fixed allocations. */
 
-	struct mutex lock;		/* Protects buddy access. */
-
-#define GPU_BALLOC_GVA_SPACE		0x1
-	u64 flags;
-
 	/*
 	 * Impose an upper bound on the maximum order.
 	 */
@@ -155,52 +185,121 @@ struct gk20a_allocator {
 	 */
 	u64 pte_blk_order;
 
-	struct dentry *debugfs_entry;
+	int inited;
+
+#define GPU_BALLOC_GVA_SPACE		0x1
+	u64 flags;
 
 	u64 bytes_alloced;
 	u64 bytes_alloced_real;
 	u64 bytes_freed;
 };
 
-#define balloc_lock(a)		mutex_lock(&(a)->lock)
-#define balloc_unlock(a)	mutex_unlock(&(a)->lock)
+struct gk20a_allocator {
+	char name[32];
+	struct mutex lock;
 
-#define balloc_get_order_list(a, order)	(&(a)->buddy_list[(order)])
-#define balloc_order_to_len(a, order)	((1 << order) * (a)->blk_size)
-#define balloc_base_shift(a, base)	((base) - (a)->start)
-#define balloc_base_unshift(a, base)	((base) + (a)->start)
+	void *priv;
+	const struct gk20a_allocator_ops *ops;
 
-int  gk20a_allocator_init(struct gk20a_allocator *allocator,
-			  const char *name, u64 base, u64 size, u64 order0);
-int  __gk20a_allocator_init(struct gk20a_allocator *allocator,
-			    struct vm_gk20a *vm, const char *name,
-			    u64 base, u64 size, u64 order0,
-			    u64 max_order, u64 flags);
-void gk20a_allocator_destroy(struct gk20a_allocator *allocator);
+	struct dentry *debugfs_entry;
+};
+
+static inline void alloc_lock(struct gk20a_allocator *a)
+{
+	mutex_lock(&a->lock);
+}
+
+static inline void alloc_unlock(struct gk20a_allocator *a)
+{
+	mutex_unlock(&a->lock);
+}
+
+static inline struct gk20a_buddy_allocator *buddy_allocator(
+	struct gk20a_allocator *a)
+{
+	return (struct gk20a_buddy_allocator *)a->priv;
+}
+
+static inline struct list_head *balloc_get_order_list(
+	struct gk20a_buddy_allocator *a, int order)
+{
+	return &a->buddy_list[order];
+}
+
+static inline u64 balloc_order_to_len(struct gk20a_buddy_allocator *a,
+				      int order)
+{
+	return (1 << order) * a->blk_size;
+}
+
+static inline u64 balloc_base_shift(struct gk20a_buddy_allocator *a,
+				    u64 base)
+{
+	return base - a->start;
+}
+
+static inline u64 balloc_base_unshift(struct gk20a_buddy_allocator *a,
+				      u64 base)
+{
+	return base + a->start;
+}
+
+static inline struct gk20a_allocator *balloc_owner(
+	struct gk20a_buddy_allocator *a)
+{
+	return a->owner;
+}
 
 /*
- * Normal alloc/free operations for the buddy allocator.
+ * Buddy allocator specific initializers.
  */
-u64  gk20a_balloc(struct gk20a_allocator *allocator, u64 len);
-void gk20a_bfree(struct gk20a_allocator *allocator, u64 addr);
+int  __gk20a_buddy_allocator_init(struct gk20a_allocator *a,
+				  struct vm_gk20a *vm, const char *name,
+				  u64 base, u64 size, u64 blk_size,
+				  u64 max_order, u64 flags);
+int  gk20a_buddy_allocator_init(struct gk20a_allocator *allocator,
+				const char *name, u64 base, u64 size,
+				u64 blk_size, u64 flags);
 
 /*
- * Special interface to allocate a memory regions with a specific starting
- * address. Yikes.
+ * Allocator APIs.
  */
-u64  gk20a_balloc_fixed(struct gk20a_allocator *allocator, u64 base, u64 len);
+u64  gk20a_alloc(struct gk20a_allocator *allocator, u64 len);
+void gk20a_free(struct gk20a_allocator *allocator, u64 addr);
+
+u64  gk20a_alloc_fixed(struct gk20a_allocator *allocator, u64 base, u64 len);
+void gk20a_free_fixed(struct gk20a_allocator *allocator, u64 base, u64 len);
+
+u64  gk20a_alloc_base(struct gk20a_allocator *a);
+u64  gk20a_alloc_length(struct gk20a_allocator *a);
+u64  gk20a_alloc_end(struct gk20a_allocator *a);
+u64  gk20a_alloc_initialized(struct gk20a_allocator *a);
+
+void gk20a_alloc_destroy(struct gk20a_allocator *allocator);
+
+void gk20a_alloc_print_stats(struct gk20a_allocator *a,
+			     struct seq_file *s, int lock);
 
 /*
- * Debugfs init.
+ * Debug stuff.
  */
 void gk20a_alloc_debugfs_init(struct platform_device *pdev);
 
+#define __alloc_pstat(seq, allocator, fmt, arg...)		\
+	do {							\
+		if (s)						\
+			seq_printf(seq, fmt, ##arg);		\
+		else						\
+			alloc_dbg(allocator, fmt, ##arg);	\
+	} while (0)
+
 #if defined(ALLOCATOR_DEBUG)
-#define balloc_dbg(alloctor, format, arg...)		\
+#define alloc_dbg(allocator, format, arg...)		\
 	pr_info("%-25s %25s() " format,			\
-		alloctor->name, __func__, ##arg)
+		allocator->name, __func__, ##arg)
 #else
-#define balloc_dbg(allocator, format, arg...)
+#define alloc_dbg(allocator, format, arg...)
 #endif
 
 #endif /* GK20A_ALLOCATOR_H */
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 673aafda..ffc695f5 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1331,7 +1331,7 @@ u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
 	gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size,
 			vm->gmmu_page_sizes[gmmu_pgsz_idx]>>10);
 
-	offset = gk20a_balloc(vma, size);
+	offset = gk20a_alloc(vma, size);
 	if (!offset) {
 		gk20a_err(dev_from_vm(vm),
 			  "%s oom: sz=0x%llx", vma->name, size);
@@ -1350,7 +1350,7 @@ int gk20a_vm_free_va(struct vm_gk20a *vm,
 
 	gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx",
 			vma->name, offset, size);
-	gk20a_bfree(vma, offset);
+	gk20a_free(vma, offset);
 
 	return 0;
 }
@@ -3407,12 +3407,12 @@ static int gk20a_init_sema_pool(struct vm_gk20a *vm)
 	 *
 	 * !!! TODO: cleanup.
 	 */
-	sema_sea->gpu_va = gk20a_balloc_fixed(&vm->vma[gmmu_page_size_kernel],
-					      vm->va_limit -
-					      mm->channel.kernel_size,
-					      512 * PAGE_SIZE);
+	sema_sea->gpu_va = gk20a_alloc_fixed(&vm->vma[gmmu_page_size_kernel],
+					     vm->va_limit -
+					     mm->channel.kernel_size,
+					     512 * PAGE_SIZE);
 	if (!sema_sea->gpu_va) {
-		gk20a_bfree(&vm->vma[gmmu_page_size_small], sema_sea->gpu_va);
+		gk20a_free(&vm->vma[gmmu_page_size_small], sema_sea->gpu_va);
 		gk20a_vm_put(vm);
 		return -ENOMEM;
 	}
@@ -3420,7 +3420,7 @@ static int gk20a_init_sema_pool(struct vm_gk20a *vm)
 	err = gk20a_semaphore_pool_map(vm->sema_pool, vm);
 	if (err) {
 		gk20a_semaphore_pool_unmap(vm->sema_pool, vm);
-		gk20a_bfree(&vm->vma[gmmu_page_size_small],
+		gk20a_free(&vm->vma[gmmu_page_size_small],
 			    vm->sema_pool->gpu_va);
 		gk20a_vm_put(vm);
 	}
@@ -3542,13 +3542,13 @@ int gk20a_init_vm(struct mm_gk20a *mm,
 		snprintf(alloc_name, sizeof(alloc_name),
 			 "gk20a_%s-fixed", name);
 
-		err = __gk20a_allocator_init(&vm->fixed,
-					     vm, alloc_name,
-					     small_vma_start,
-					     g->separate_fixed_allocs,
-					     SZ_4K,
-					     GPU_BALLOC_MAX_ORDER,
-					     GPU_BALLOC_GVA_SPACE);
+		err = __gk20a_buddy_allocator_init(&vm->fixed,
+						   vm, alloc_name,
+						   small_vma_start,
+						   g->separate_fixed_allocs,
+						   SZ_4K,
+						   GPU_BALLOC_MAX_ORDER,
+						   GPU_BALLOC_GVA_SPACE);
 		if (err)
 			goto clean_up_ptes;
 
@@ -3559,13 +3559,14 @@ int gk20a_init_vm(struct mm_gk20a *mm,
 	if (small_vma_start < small_vma_limit) {
 		snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name,
 			 vm->gmmu_page_sizes[gmmu_page_size_small] >> 10);
-		err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_small],
-					     vm, alloc_name,
-					     small_vma_start,
-					     small_vma_limit - small_vma_start,
-					     SZ_4K,
-					     GPU_BALLOC_MAX_ORDER,
-					     GPU_BALLOC_GVA_SPACE);
+		err = __gk20a_buddy_allocator_init(
+			&vm->vma[gmmu_page_size_small],
+			vm, alloc_name,
+			small_vma_start,
+			small_vma_limit - small_vma_start,
+			SZ_4K,
+			GPU_BALLOC_MAX_ORDER,
+			GPU_BALLOC_GVA_SPACE);
 		if (err)
 			goto clean_up_ptes;
 	}
@@ -3573,13 +3574,14 @@ int gk20a_init_vm(struct mm_gk20a *mm,
 	if (large_vma_start < large_vma_limit) {
 		snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB",
 			 name, vm->gmmu_page_sizes[gmmu_page_size_big] >> 10);
-		err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_big],
-					     vm, alloc_name,
-					     large_vma_start,
-					     large_vma_limit - large_vma_start,
-					     big_page_size,
-					     GPU_BALLOC_MAX_ORDER,
-					     GPU_BALLOC_GVA_SPACE);
+		err = __gk20a_buddy_allocator_init(
+			&vm->vma[gmmu_page_size_big],
+			vm, alloc_name,
+			large_vma_start,
+			large_vma_limit - large_vma_start,
+			big_page_size,
+			GPU_BALLOC_MAX_ORDER,
+			GPU_BALLOC_GVA_SPACE);
 		if (err)
 			goto clean_up_small_allocator;
 	}
@@ -3589,13 +3591,13 @@ int gk20a_init_vm(struct mm_gk20a *mm,
 	/*
 	 * kernel reserved VMA is at the end of the aperture
 	 */
-	err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_kernel],
-				     vm, alloc_name,
-				     kernel_vma_start,
-				     kernel_vma_limit - kernel_vma_start,
-				     SZ_4K,
-				     GPU_BALLOC_MAX_ORDER,
-				     GPU_BALLOC_GVA_SPACE);
+	err = __gk20a_buddy_allocator_init(&vm->vma[gmmu_page_size_kernel],
+					   vm, alloc_name,
+					   kernel_vma_start,
+					   kernel_vma_limit - kernel_vma_start,
+					   SZ_4K,
+					   GPU_BALLOC_MAX_ORDER,
+					   GPU_BALLOC_GVA_SPACE);
 	if (err)
 		goto clean_up_big_allocator;
 
@@ -3620,10 +3622,10 @@ int gk20a_init_vm(struct mm_gk20a *mm,
 
 clean_up_big_allocator:
 	if (large_vma_start < large_vma_limit)
-		gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
+		gk20a_alloc_destroy(&vm->vma[gmmu_page_size_big]);
 clean_up_small_allocator:
 	if (small_vma_start < small_vma_limit)
-		gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
+		gk20a_alloc_destroy(&vm->vma[gmmu_page_size_small]);
 clean_up_ptes:
 	free_gmmu_pages(vm, &vm->pdb);
 clean_up_pdes:
@@ -3730,15 +3732,15 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
 
 	vma = &vm->vma[pgsz_idx];
 	if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) {
-		if (vm->fixed.init)
+		if (gk20a_alloc_initialized(&vm->fixed))
 			vma = &vm->fixed;
-		vaddr_start = gk20a_balloc_fixed(vma, args->o_a.offset,
-						 (u64)args->pages *
-						 (u64)args->page_size);
+		vaddr_start = gk20a_alloc_fixed(vma, args->o_a.offset,
+						(u64)args->pages *
+						(u64)args->page_size);
 	} else {
-		vaddr_start = gk20a_balloc(vma,
-					   (u64)args->pages *
-					   (u64)args->page_size);
+		vaddr_start = gk20a_alloc(vma,
+					  (u64)args->pages *
+					  (u64)args->page_size);
 	}
 
 	if (!vaddr_start) {
@@ -3772,7 +3774,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
 					 APERTURE_INVALID);
 		if (!map_offset) {
 			mutex_unlock(&vm->update_gmmu_lock);
-			gk20a_bfree(vma, vaddr_start);
+			gk20a_free(vma, vaddr_start);
 			kfree(va_node);
 			goto clean_up;
 		}
@@ -3807,11 +3809,11 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
 	pgsz_idx = __nv_gmmu_va_is_big_page_region(vm, args->offset) ?
 			gmmu_page_size_big : gmmu_page_size_small;
 
-	if (vm->fixed.init)
+	if (gk20a_alloc_initialized(&vm->fixed))
 		vma = &vm->fixed;
 	else
 		vma = &vm->vma[pgsz_idx];
-	gk20a_bfree(vma, args->offset);
+	gk20a_free(vma, args->offset);
 
 	mutex_lock(&vm->update_gmmu_lock);
 	va_node = addr_to_reservation(vm, args->offset);
@@ -3995,13 +3997,13 @@ int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset,
 
 void gk20a_deinit_vm(struct vm_gk20a *vm)
 {
-	gk20a_allocator_destroy(&vm->vma[gmmu_page_size_kernel]);
-	if (vm->vma[gmmu_page_size_big].init)
-		gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
-	if (vm->vma[gmmu_page_size_small].init)
-		gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
-	if (vm->fixed.init)
-		gk20a_allocator_destroy(&vm->fixed);
+	gk20a_alloc_destroy(&vm->vma[gmmu_page_size_kernel]);
+	if (gk20a_alloc_initialized(&vm->vma[gmmu_page_size_big]))
+		gk20a_alloc_destroy(&vm->vma[gmmu_page_size_big]);
+	if (gk20a_alloc_initialized(&vm->vma[gmmu_page_size_small]))
+		gk20a_alloc_destroy(&vm->vma[gmmu_page_size_small]);
+	if (gk20a_alloc_initialized(&vm->fixed))
+		gk20a_alloc_destroy(&vm->fixed);
 
 	gk20a_vm_free_entries(vm, &vm->pdb, 0);
 }
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 2e9172c7..66e46480 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -424,12 +424,13 @@ static inline u64 __nv_gmmu_va_small_page_limit(void)
 
 static inline int __nv_gmmu_va_is_big_page_region(struct vm_gk20a *vm, u64 addr)
 {
+	struct gk20a_allocator *a = &vm->vma[gmmu_page_size_big];
+
 	if (!vm->big_pages)
 		return 0;
 
-	return addr >= vm->vma[gmmu_page_size_big].base &&
-		addr < vm->vma[gmmu_page_size_big].base +
-		vm->vma[gmmu_page_size_big].length;
+	return addr >= gk20a_alloc_base(a) &&
+		addr < gk20a_alloc_base(a) + gk20a_alloc_length(a);
 }
 
 /*
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
index 347d7158..a3898993 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -2856,8 +2856,8 @@ void gk20a_remove_pmu_support(struct pmu_gk20a *pmu)
 {
 	gk20a_dbg_fn("");
 
-	if (pmu->dmem.init)
-		gk20a_allocator_destroy(&pmu->dmem);
+	if (gk20a_alloc_initialized(&pmu->dmem))
+		gk20a_alloc_destroy(&pmu->dmem);
 }
 
 static int gk20a_init_pmu_reset_enable_hw(struct gk20a *g)
@@ -3503,7 +3503,7 @@ static int pmu_init_perfmon(struct pmu_gk20a *pmu)
 	gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data);
 
 	if (!pmu->sample_buffer)
-		pmu->sample_buffer = gk20a_balloc(&pmu->dmem,
+		pmu->sample_buffer = gk20a_alloc(&pmu->dmem,
 						  2 * sizeof(u16));
 	if (!pmu->sample_buffer) {
 		gk20a_err(dev_from_gk20a(g),
@@ -3605,7 +3605,7 @@ static int pmu_process_init_msg(struct pmu_gk20a *pmu,
 	for (i = 0; i < PMU_QUEUE_COUNT; i++)
 		pmu_queue_init(pmu, i, init);
 
-	if (!pmu->dmem.init) {
+	if (!gk20a_alloc_initialized(&pmu->dmem)) {
 		/* Align start and end addresses */
 		u32 start = ALIGN(pv->get_pmu_init_msg_pmu_sw_mg_off(init),
 				  PMU_DMEM_ALLOC_ALIGNMENT);
@@ -3613,9 +3613,9 @@ static int pmu_process_init_msg(struct pmu_gk20a *pmu,
 			   pv->get_pmu_init_msg_pmu_sw_mg_size(init)) &
 			~(PMU_DMEM_ALLOC_ALIGNMENT - 1);
 		u32 size = end - start;
-		__gk20a_allocator_init(&pmu->dmem, NULL, "gk20a_pmu_dmem",
-				       start, size,
-				       PMU_DMEM_ALLOC_ALIGNMENT, 4, 0);
+		gk20a_buddy_allocator_init(&pmu->dmem, "gk20a_pmu_dmem",
+					   start, size,
+					   PMU_DMEM_ALLOC_ALIGNMENT, 0);
 	}
 
 	pmu->pmu_ready = true;
@@ -3752,12 +3752,12 @@ static int pmu_response_handle(struct pmu_gk20a *pmu,
 		seq->callback = NULL;
 	if (pv->pmu_allocation_get_dmem_size(pmu,
 			pv->get_pmu_seq_in_a_ptr(seq)) != 0)
-		gk20a_bfree(&pmu->dmem,
+		gk20a_free(&pmu->dmem,
 			pv->pmu_allocation_get_dmem_offset(pmu,
 			pv->get_pmu_seq_in_a_ptr(seq)));
 	if (pv->pmu_allocation_get_dmem_size(pmu,
 			pv->get_pmu_seq_out_a_ptr(seq)) != 0)
-		gk20a_bfree(&pmu->dmem,
+		gk20a_free(&pmu->dmem,
 			pv->pmu_allocation_get_dmem_offset(pmu,
 			pv->get_pmu_seq_out_a_ptr(seq)));
 
@@ -4418,7 +4418,7 @@ int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd,
 			(u16)max(payload->in.size, payload->out.size));
 
 		*(pv->pmu_allocation_get_dmem_offset_addr(pmu, in)) =
-			gk20a_balloc(&pmu->dmem,
+			gk20a_alloc(&pmu->dmem,
 				     pv->pmu_allocation_get_dmem_size(pmu, in));
 		if (!*(pv->pmu_allocation_get_dmem_offset_addr(pmu, in)))
 			goto clean_up;
@@ -4443,7 +4443,7 @@ int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd,
 		if (payload->out.buf != payload->in.buf) {
 
 			*(pv->pmu_allocation_get_dmem_offset_addr(pmu, out)) =
-				gk20a_balloc(&pmu->dmem,
+				gk20a_alloc(&pmu->dmem,
 				    pv->pmu_allocation_get_dmem_size(pmu, out));
 			if (!*(pv->pmu_allocation_get_dmem_offset_addr(pmu,
 								       out)))
@@ -4474,10 +4474,10 @@ int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd,
 clean_up:
 	gk20a_dbg_fn("fail");
 	if (in)
-		gk20a_bfree(&pmu->dmem,
+		gk20a_free(&pmu->dmem,
 			pv->pmu_allocation_get_dmem_offset(pmu, in));
 	if (out)
-		gk20a_bfree(&pmu->dmem,
+		gk20a_free(&pmu->dmem,
 			pv->pmu_allocation_get_dmem_offset(pmu, out));
 
 	pmu_seq_release(pmu, seq);
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
index 2239fcbc..c6f42703 100644
--- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -230,11 +230,11 @@ static void vgpu_vm_remove_support(struct vm_gk20a *vm)
 	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
 	WARN_ON(err || msg.ret);
 
-	gk20a_allocator_destroy(&vm->vma[gmmu_page_size_kernel]);
-	if (vm->vma[gmmu_page_size_small].init)
-		gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
-	if (vm->vma[gmmu_page_size_big].init)
-		gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
+	gk20a_alloc_destroy(&vm->vma[gmmu_page_size_kernel]);
+	if (gk20a_alloc_initialized(&vm->vma[gmmu_page_size_small]))
+		gk20a_alloc_destroy(&vm->vma[gmmu_page_size_small]);
+	if (gk20a_alloc_initialized(&vm->vma[gmmu_page_size_big]))
+		gk20a_alloc_destroy(&vm->vma[gmmu_page_size_big]);
 
 	mutex_unlock(&vm->update_gmmu_lock);
 
@@ -374,7 +374,8 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
 		snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
 			 gmmu_page_sizes[gmmu_page_size_small] >> 10);
 
-		err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_small],
+		err = __gk20a_buddy_allocator_init(
+					&vm->vma[gmmu_page_size_small],
 					vm, name,
 					small_vma_start,
 					small_vma_limit - small_vma_start,
@@ -388,7 +389,8 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
 	if (large_vma_start < large_vma_limit) {
 		snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
 			gmmu_page_sizes[gmmu_page_size_big] >> 10);
-		err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_big],
+		err = __gk20a_buddy_allocator_init(
+					&vm->vma[gmmu_page_size_big],
 					vm, name,
 					large_vma_start,
 					large_vma_limit - large_vma_start,
@@ -404,7 +406,7 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
 	/*
 	 * kernel reserved VMA is at the end of the aperture
 	 */
-	err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_kernel],
+	err = __gk20a_buddy_allocator_init(&vm->vma[gmmu_page_size_kernel],
 				     vm, name,
 				     kernel_vma_start,
 				     kernel_vma_limit - kernel_vma_start,
@@ -426,10 +428,10 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
 
 clean_up_big_allocator:
 	if (large_vma_start < large_vma_limit)
-		gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
+		gk20a_alloc_destroy(&vm->vma[gmmu_page_size_big]);
 clean_up_small_allocator:
 	if (small_vma_start < small_vma_limit)
-		gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
+		gk20a_alloc_destroy(&vm->vma[gmmu_page_size_small]);
 clean_up_share:
 	msg.cmd = TEGRA_VGPU_CMD_AS_FREE_SHARE;
 	msg.handle = platform->virt_handle;
-- 
cgit v1.2.2