16 files changed, 7589 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c b/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c
new file mode 100644
index 00000000..6bd654b8
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c
@@ -0,0 +1,438 @@
+/*
+ * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <nvgpu/bitops.h>
+#include <nvgpu/allocator.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/barrier.h>
+#include "bitmap_allocator_priv.h"
+static u64 nvgpu_bitmap_alloc_length(struct nvgpu_allocator *a)
+{
+        struct nvgpu_bitmap_allocator *ba = a->priv;
+        return ba->length;
+}
+static u64 nvgpu_bitmap_alloc_base(struct nvgpu_allocator *a)
+{
+        struct nvgpu_bitmap_allocator *ba = a->priv;
+        return ba->base;
+}
+static int nvgpu_bitmap_alloc_inited(struct nvgpu_allocator *a)
+{
+        struct nvgpu_bitmap_allocator *ba = a->priv;
+        int inited = ba->inited;
+        nvgpu_smp_rmb();
+        return inited;
+}
+static u64 nvgpu_bitmap_alloc_end(struct nvgpu_allocator *a)
+{
+        struct nvgpu_bitmap_allocator *ba = a->priv;
+        return ba->base + ba->length;
+}
+/*
+ * @page_size is ignored.
+ */
+static u64 nvgpu_bitmap_alloc_fixed(struct nvgpu_allocator *__a,
+                                    u64 base, u64 len, u32 page_size)
+{
+        struct nvgpu_bitmap_allocator *a = bitmap_allocator(__a);
+        u64 blks, offs, ret;
+        /* Compute the bit offset and make sure it's aligned to a block.  */
+        offs = base >> a->blk_shift;
+        if (offs * a->blk_size != base)
+                return 0;
+        offs -= a->bit_offs;
+        blks = len >> a->blk_shift;
+        if (blks * a->blk_size != len)
+                blks++;
+        alloc_lock(__a);
+        /* Check if the space requested is already occupied. */
+        ret = bitmap_find_next_zero_area(a->bitmap, a->num_bits, offs, blks, 0);
+        if (ret != offs)
+                goto fail;
+        bitmap_set(a->bitmap, offs, blks);
+        a->bytes_alloced += blks * a->blk_size;
+        a->nr_fixed_allocs++;
+        alloc_unlock(__a);
+        alloc_dbg(__a, "Alloc-fixed 0x%-10llx 0x%-5llx [bits=0x%llx (%llu)]\n",
+                  base, len, blks, blks);
+        return base;
+fail:
+        alloc_unlock(__a);
+        alloc_dbg(__a, "Alloc-fixed failed! (0x%llx)\n", base);
+        return 0;
+}
+/*
+ * Two possibilities for this function: either we are freeing a fixed allocation
+ * or we are freeing a regular alloc but with GPU_ALLOC_NO_ALLOC_PAGE defined.
+ *
+ * Note: this function won't do much error checking. Thus you could really
+ * confuse the allocator if you misuse this function.
+ */
+static void nvgpu_bitmap_free_fixed(struct nvgpu_allocator *__a,
+                                    u64 base, u64 len)
+{
+        struct nvgpu_bitmap_allocator *a = bitmap_allocator(__a);
+        u64 blks, offs;
+        offs = base >> a->blk_shift;
+        if (WARN_ON(offs * a->blk_size != base))
+                return;
+        offs -= a->bit_offs;
+        blks = len >> a->blk_shift;
+        if (blks * a->blk_size != len)
+                blks++;
+        alloc_lock(__a);
+        bitmap_clear(a->bitmap, offs, blks);
+        a->bytes_freed += blks * a->blk_size;
+        alloc_unlock(__a);
+        alloc_dbg(__a, "Free-fixed 0x%-10llx 0x%-5llx [bits=0x%llx (%llu)]\n",
+                  base, len, blks, blks);
+}
+/*
+ * Add the passed alloc to the tree of stored allocations.
+ */
+static void insert_alloc_metadata(struct nvgpu_bitmap_allocator *a,
+                                  struct nvgpu_bitmap_alloc *alloc)
+{
+        alloc->alloc_entry.key_start = alloc->base;
+        alloc->alloc_entry.key_end = alloc->base + alloc->length;
+        nvgpu_rbtree_insert(&alloc->alloc_entry, &a->allocs);
+}
+/*
+ * Find and remove meta-data from the outstanding allocations.
+ */
+static struct nvgpu_bitmap_alloc *find_alloc_metadata(
+        struct nvgpu_bitmap_allocator *a, u64 addr)
+{
+        struct nvgpu_bitmap_alloc *alloc;
+        struct nvgpu_rbtree_node *node = NULL;
+        nvgpu_rbtree_search(addr, &node, a->allocs);
+        if (!node)
+                return NULL;
+        alloc = nvgpu_bitmap_alloc_from_rbtree_node(node);
+        nvgpu_rbtree_unlink(node, &a->allocs);
+        return alloc;
+}
+/*
+ * Tree of alloc meta data stores the address of the alloc not the bit offset.
+ */
+static int __nvgpu_bitmap_store_alloc(struct nvgpu_bitmap_allocator *a,
+                                      u64 addr, u64 len)
+{
+        struct nvgpu_bitmap_alloc *alloc =
+                nvgpu_kmem_cache_alloc(a->meta_data_cache);
+        if (!alloc)
+                return -ENOMEM;
+        alloc->base = addr;
+        alloc->length = len;
+        insert_alloc_metadata(a, alloc);
+        return 0;
+}
+/*
+ * @len is in bytes. This routine will figure out the right number of bits to
+ * actually allocate. The return is the address in bytes as well.
+ */
+static u64 nvgpu_bitmap_alloc(struct nvgpu_allocator *__a, u64 len)
+{
+        u64 blks, addr;
+        unsigned long offs, adjusted_offs, limit;
+        struct nvgpu_bitmap_allocator *a = bitmap_allocator(__a);
+        blks = len >> a->blk_shift;
+        if (blks * a->blk_size != len)
+                blks++;
+        alloc_lock(__a);
+        /*
+         * First look from next_blk and onwards...
+         */
+        offs = bitmap_find_next_zero_area(a->bitmap, a->num_bits,
+                                          a->next_blk, blks, 0);
+        if (offs >= a->num_bits) {
+                /*
+                 * If that didn't work try the remaining area. Since there can
+                 * be available space that spans across a->next_blk we need to
+                 * search up to the first set bit after that.
+                 */
+                limit = find_next_bit(a->bitmap, a->num_bits, a->next_blk);
+                offs = bitmap_find_next_zero_area(a->bitmap, limit,
+                                                  0, blks, 0);
+                if (offs >= a->next_blk)
+                        goto fail;
+        }
+        bitmap_set(a->bitmap, offs, blks);
+        a->next_blk = offs + blks;
+        adjusted_offs = offs + a->bit_offs;
+        addr = ((u64)adjusted_offs) * a->blk_size;
+        /*
+         * Only do meta-data storage if we are allowed to allocate storage for
+         * that meta-data. The issue with using malloc and friends is that
+         * in latency and success critical paths an alloc_page() call can either
+         * sleep for potentially a long time or fail. Since we might not want
+         * either of these possibilities assume that the caller will keep what
+         * data it needs around to successfully free this allocation.
+         */
+        if (!(a->flags & GPU_ALLOC_NO_ALLOC_PAGE) &&
+            __nvgpu_bitmap_store_alloc(a, addr, blks * a->blk_size))
+                goto fail_reset_bitmap;
+        alloc_dbg(__a, "Alloc 0x%-10llx 0x%-5llx [bits=0x%llx (%llu)]\n",
+                  addr, len, blks, blks);
+        a->nr_allocs++;
+        a->bytes_alloced += (blks * a->blk_size);
+        alloc_unlock(__a);
+        return addr;
+fail_reset_bitmap:
+        bitmap_clear(a->bitmap, offs, blks);
+fail:
+        a->next_blk = 0;
+        alloc_unlock(__a);
+        alloc_dbg(__a, "Alloc failed!\n");
+        return 0;
+}
+static void nvgpu_bitmap_free(struct nvgpu_allocator *__a, u64 addr)
+{
+        struct nvgpu_bitmap_allocator *a = bitmap_allocator(__a);
+        struct nvgpu_bitmap_alloc *alloc = NULL;
+        u64 offs, adjusted_offs, blks;
+        alloc_lock(__a);
+        if (a->flags & GPU_ALLOC_NO_ALLOC_PAGE) {
+                WARN(1, "Using wrong free for NO_ALLOC_PAGE bitmap allocator");
+                goto done;
+        }
+        alloc = find_alloc_metadata(a, addr);
+        if (!alloc)
+                goto done;
+        /*
+         * Address comes from adjusted offset (i.e the bit offset with
+         * a->bit_offs added. So start with that and then work out the real
+         * offs into the bitmap.
+         */
+        adjusted_offs = addr >> a->blk_shift;
+        offs = adjusted_offs - a->bit_offs;
+        blks = alloc->length >> a->blk_shift;
+        bitmap_clear(a->bitmap, offs, blks);
+        alloc_dbg(__a, "Free  0x%-10llx\n", addr);
+        a->bytes_freed += alloc->length;
+done:
+        if (a->meta_data_cache && alloc)
+                nvgpu_kmem_cache_free(a->meta_data_cache, alloc);
+        alloc_unlock(__a);
+}
+static void nvgpu_bitmap_alloc_destroy(struct nvgpu_allocator *__a)
+{
+        struct nvgpu_bitmap_allocator *a = bitmap_allocator(__a);
+        struct nvgpu_bitmap_alloc *alloc;
+        struct nvgpu_rbtree_node *node;
+        /*
+         * Kill any outstanding allocations.
+         */
+        nvgpu_rbtree_enum_start(0, &node, a->allocs);
+        while (node) {
+                alloc = nvgpu_bitmap_alloc_from_rbtree_node(node);
+                nvgpu_rbtree_unlink(node, &a->allocs);
+                nvgpu_kmem_cache_free(a->meta_data_cache, alloc);
+                nvgpu_rbtree_enum_start(0, &node, a->allocs);
+        }
+        nvgpu_kmem_cache_destroy(a->meta_data_cache);
+        nvgpu_kfree(nvgpu_alloc_to_gpu(__a), a->bitmap);
+        nvgpu_kfree(nvgpu_alloc_to_gpu(__a), a);
+}
+#ifdef __KERNEL__
+static void nvgpu_bitmap_print_stats(struct nvgpu_allocator *__a,
+                                     struct seq_file *s, int lock)
+{
+        struct nvgpu_bitmap_allocator *a = bitmap_allocator(__a);
+        __alloc_pstat(s, __a, "Bitmap allocator params:\n");
+        __alloc_pstat(s, __a, "  start = 0x%llx\n", a->base);
+        __alloc_pstat(s, __a, "  end   = 0x%llx\n", a->base + a->length);
+        __alloc_pstat(s, __a, "  blks  = 0x%llx\n", a->num_bits);
+        /* Actual stats. */
+        __alloc_pstat(s, __a, "Stats:\n");
+        __alloc_pstat(s, __a, "  Number allocs = 0x%llx\n", a->nr_allocs);
+        __alloc_pstat(s, __a, "  Number fixed  = 0x%llx\n", a->nr_fixed_allocs);
+        __alloc_pstat(s, __a, "  Bytes alloced = 0x%llx\n", a->bytes_alloced);
+        __alloc_pstat(s, __a, "  Bytes freed   = 0x%llx\n", a->bytes_freed);
+        __alloc_pstat(s, __a, "  Outstanding   = 0x%llx\n",
+                      a->bytes_alloced - a->bytes_freed);
+}
+#endif
+static const struct nvgpu_allocator_ops bitmap_ops = {
+        .alloc          = nvgpu_bitmap_alloc,
+        .free           = nvgpu_bitmap_free,
+        .alloc_fixed    = nvgpu_bitmap_alloc_fixed,
+        .free_fixed     = nvgpu_bitmap_free_fixed,
+        .base           = nvgpu_bitmap_alloc_base,
+        .length         = nvgpu_bitmap_alloc_length,
+        .end            = nvgpu_bitmap_alloc_end,
+        .inited         = nvgpu_bitmap_alloc_inited,
+        .fini           = nvgpu_bitmap_alloc_destroy,
+#ifdef __KERNEL__
+        .print_stats    = nvgpu_bitmap_print_stats,
+#endif
+};
+int nvgpu_bitmap_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
+                                const char *name, u64 base, u64 length,
+                                u64 blk_size, u64 flags)
+{
+        int err;
+        struct nvgpu_bitmap_allocator *a;
+        if (WARN_ON(blk_size & (blk_size - 1)))
+                return -EINVAL;
+        /*
+         * blk_size must be a power-of-2; base length also need to be aligned
+         * to blk_size.
+         */
+        if (blk_size & (blk_size - 1) ||
+            base & (blk_size - 1) || length & (blk_size - 1))
+                return -EINVAL;
+        if (base == 0) {
+                base = blk_size;
+                length -= blk_size;
+        }
+        a = nvgpu_kzalloc(g, sizeof(struct nvgpu_bitmap_allocator));
+        if (!a)
+                return -ENOMEM;
+        err = __nvgpu_alloc_common_init(__a, g, name, a, false, &bitmap_ops);
+        if (err)
+                goto fail;
+        if (!(flags & GPU_ALLOC_NO_ALLOC_PAGE)) {
+                a->meta_data_cache = nvgpu_kmem_cache_create(g,
+                                        sizeof(struct nvgpu_bitmap_alloc));
+                if (!a->meta_data_cache) {
+                        err = -ENOMEM;
+                        goto fail;
+                }
+        }
+        a->base = base;
+        a->length = length;
+        a->blk_size = blk_size;
+        a->blk_shift = __ffs(a->blk_size);
+        a->num_bits = length >> a->blk_shift;
+        a->bit_offs = a->base >> a->blk_shift;
+        a->flags = flags;
+        a->allocs = NULL;
+        a->bitmap = nvgpu_kcalloc(g, BITS_TO_LONGS(a->num_bits),
+                                  sizeof(*a->bitmap));
+        if (!a->bitmap) {
+                err = -ENOMEM;
+                goto fail;
+        }
+        nvgpu_smp_wmb();
+        a->inited = true;
+#ifdef CONFIG_DEBUG_FS
+        nvgpu_init_alloc_debug(g, __a);
+#endif
+        alloc_dbg(__a, "New allocator: type      bitmap\n");
+        alloc_dbg(__a, "               base      0x%llx\n", a->base);
+        alloc_dbg(__a, "               bit_offs  0x%llx\n", a->bit_offs);
+        alloc_dbg(__a, "               size      0x%llx\n", a->length);
+        alloc_dbg(__a, "               blk_size  0x%llx\n", a->blk_size);
+        alloc_dbg(__a, "               flags     0x%llx\n", a->flags);
+        return 0;
+fail:
+        if (a->meta_data_cache)
+                nvgpu_kmem_cache_destroy(a->meta_data_cache);
+        nvgpu_kfree(g, a);
+        return err;
+}
diff --git a/drivers/gpu/nvgpu/common/mm/bitmap_allocator_priv.h b/drivers/gpu/nvgpu/common/mm/bitmap_allocator_priv.h
new file mode 100644
index 00000000..1750447d
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/bitmap_allocator_priv.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef BITMAP_ALLOCATOR_PRIV_H
+#define BITMAP_ALLOCATOR_PRIV_H
+#include <nvgpu/rbtree.h>
+#include <nvgpu/kmem.h>
+struct nvgpu_allocator;
+struct nvgpu_bitmap_allocator {
+        struct nvgpu_allocator *owner;
+        u64 base;                       /* Base address of the space. */
+        u64 length;                     /* Length of the space. */
+        u64 blk_size;                   /* Size that corresponds to 1 bit. */
+        u64 blk_shift;                  /* Bit shift to divide by blk_size. */
+        u64 num_bits;                   /* Number of allocatable bits. */
+        u64 bit_offs;                   /* Offset of bitmap. */
+        /*
+         * Optimization for making repeated allocations faster. Keep track of
+         * the next bit after the most recent allocation. This is where the next
+         * search will start from. This should make allocation faster in cases
+         * where lots of allocations get made one after another. It shouldn't
+         * have a negative impact on the case where the allocator is fragmented.
+         */
+        u64 next_blk;
+        unsigned long *bitmap;          /* The actual bitmap! */
+        struct nvgpu_rbtree_node *allocs;  /* Tree of outstanding allocations */
+        struct nvgpu_kmem_cache *meta_data_cache;
+        u64 flags;
+        bool inited;
+        /* Statistics */
+        u64 nr_allocs;
+        u64 nr_fixed_allocs;
+        u64 bytes_alloced;
+        u64 bytes_freed;
+};
+struct nvgpu_bitmap_alloc {
+        u64 base;
+        u64 length;
+        struct nvgpu_rbtree_node alloc_entry;   /* RB tree of allocations. */
+};
+static inline struct nvgpu_bitmap_alloc *
+nvgpu_bitmap_alloc_from_rbtree_node(struct nvgpu_rbtree_node *node)
+{
+        return (struct nvgpu_bitmap_alloc *)
+        ((uintptr_t)node - offsetof(struct nvgpu_bitmap_alloc, alloc_entry));
+};
+static inline struct nvgpu_bitmap_allocator *bitmap_allocator(
+        struct nvgpu_allocator *a)
+{
+        return (struct nvgpu_bitmap_allocator *)(a)->priv;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
new file mode 100644
index 00000000..a2546e9d
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
@@ -0,0 +1,1323 @@
+/*
+ * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <nvgpu/allocator.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/log2.h>
+#include <nvgpu/barrier.h>
+#include <nvgpu/mm.h>
+#include <nvgpu/vm.h>
+#include "buddy_allocator_priv.h"
+/* Some other buddy allocator functions. */
+static struct nvgpu_buddy *balloc_free_buddy(struct nvgpu_buddy_allocator *a,
+                                             u64 addr);
+static void balloc_coalesce(struct nvgpu_buddy_allocator *a,
+                            struct nvgpu_buddy *b);
+static void __balloc_do_free_fixed(struct nvgpu_buddy_allocator *a,
+                                   struct nvgpu_fixed_alloc *falloc);
+/*
+ * This function is not present in older kernel's list.h code.
+ */
+#ifndef list_last_entry
+#define list_last_entry(ptr, type, member)      \
+        list_entry((ptr)->prev, type, member)
+#endif
+/*
+ * GPU buddy allocator for various address spaces.
+ *
+ * Current limitations:
+ *   o  A fixed allocation could potentially be made that borders PDEs with
+ *      different PTE sizes. This would require that fixed buffer to have
+ *      different sized PTEs for different parts of the allocation. Probably
+ *      best to just require PDE alignment for fixed address allocs.
+ *
+ *   o  It is currently possible to make an allocator that has a buddy alignment
+ *      out of sync with the PDE block size alignment. A simple example is a
+ *      32GB address space starting at byte 1. Every buddy is shifted off by 1
+ *      which means each buddy corresponf to more than one actual GPU page. The
+ *      best way to fix this is probably just require PDE blocksize alignment
+ *      for the start of the address space. At the moment all allocators are
+ *      easily PDE aligned so this hasn't been a problem.
+ */
+/*
+ * Pick a suitable maximum order for this allocator.
+ *
+ * Hueristic: Just guessing that the best max order is the largest single
+ * block that will fit in the address space.
+ */
+static void balloc_compute_max_order(struct nvgpu_buddy_allocator *a)
+{
+        u64 true_max_order = ilog2(a->blks);
+        if (a->max_order == 0) {
+                a->max_order = true_max_order;
+                return;
+        }
+        if (a->max_order > true_max_order)
+                a->max_order = true_max_order;
+        if (a->max_order > GPU_BALLOC_MAX_ORDER)
+                a->max_order = GPU_BALLOC_MAX_ORDER;
+}
+/*
+ * Since we can only allocate in chucks of a->blk_size we need to trim off
+ * any excess data that is not aligned to a->blk_size.
+ */
+static void balloc_allocator_align(struct nvgpu_buddy_allocator *a)
+{
+        a->start = ALIGN(a->base, a->blk_size);
+        WARN_ON(a->start != a->base);
+        a->end   = (a->base + a->length) & ~(a->blk_size - 1);
+        a->count = a->end - a->start;
+        a->blks  = a->count >> a->blk_shift;
+}
+/*
+ * Pass NULL for parent if you want a top level buddy.
+ */
+static struct nvgpu_buddy *balloc_new_buddy(struct nvgpu_buddy_allocator *a,
+                                            struct nvgpu_buddy *parent,
+                                            u64 start, u64 order)
+{
+        struct nvgpu_buddy *new_buddy;
+        new_buddy = nvgpu_kmem_cache_alloc(a->buddy_cache);
+        if (!new_buddy)
+                return NULL;
+        memset(new_buddy, 0, sizeof(struct nvgpu_buddy));
+        new_buddy->parent = parent;
+        new_buddy->start = start;
+        new_buddy->order = order;
+        new_buddy->end = start + (1 << order) * a->blk_size;
+        new_buddy->pte_size = BALLOC_PTE_SIZE_ANY;
+        return new_buddy;
+}
+static void __balloc_buddy_list_add(struct nvgpu_buddy_allocator *a,
+                                    struct nvgpu_buddy *b,
+                                    struct nvgpu_list_node *list)
+{
+        if (buddy_is_in_list(b)) {
+                alloc_dbg(balloc_owner(a),
+                          "Oops: adding added buddy (%llu:0x%llx)\n",
+                          b->order, b->start);
+                BUG();
+        }
+        /*
+         * Add big PTE blocks to the tail, small to the head for GVA spaces.
+         * This lets the code that checks if there are available blocks check
+         * without cycling through the entire list.
+         */
+        if (a->flags & GPU_ALLOC_GVA_SPACE &&
+            b->pte_size == gmmu_page_size_big)
+                nvgpu_list_add_tail(&b->buddy_entry, list);
+        else
+                nvgpu_list_add(&b->buddy_entry, list);
+        buddy_set_in_list(b);
+}
+static void __balloc_buddy_list_rem(struct nvgpu_buddy_allocator *a,
+                                    struct nvgpu_buddy *b)
+{
+        if (!buddy_is_in_list(b)) {
+                alloc_dbg(balloc_owner(a),
+                          "Oops: removing removed buddy (%llu:0x%llx)\n",
+                          b->order, b->start);
+                BUG();
+        }
+        nvgpu_list_del(&b->buddy_entry);
+        buddy_clr_in_list(b);
+}
+/*
+ * Add a buddy to one of the buddy lists and deal with the necessary
+ * book keeping. Adds the buddy to the list specified by the buddy's order.
+ */
+static void balloc_blist_add(struct nvgpu_buddy_allocator *a,
+                             struct nvgpu_buddy *b)
+{
+        __balloc_buddy_list_add(a, b, balloc_get_order_list(a, b->order));
+        a->buddy_list_len[b->order]++;
+}
+static void balloc_blist_rem(struct nvgpu_buddy_allocator *a,
+                             struct nvgpu_buddy *b)
+{
+        __balloc_buddy_list_rem(a, b);
+        a->buddy_list_len[b->order]--;
+}
+static u64 balloc_get_order(struct nvgpu_buddy_allocator *a, u64 len)
+{
+        if (len == 0)
+                return 0;
+        len--;
+        len >>= a->blk_shift;
+        return fls(len);
+}
+static u64 __balloc_max_order_in(struct nvgpu_buddy_allocator *a,
+                                 u64 start, u64 end)
+{
+        u64 size = (end - start) >> a->blk_shift;
+        if (size > 0)
+                return min_t(u64, ilog2(size), a->max_order);
+        else
+                return GPU_BALLOC_MAX_ORDER;
+}
+/*
+ * Initialize the buddy lists.
+ */
+static int balloc_init_lists(struct nvgpu_buddy_allocator *a)
+{
+        int i;
+        u64 bstart, bend, order;
+        struct nvgpu_buddy *buddy;
+        bstart = a->start;
+        bend = a->end;
+        /* First make sure the LLs are valid. */
+        for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++)
+                nvgpu_init_list_node(balloc_get_order_list(a, i));
+        while (bstart < bend) {
+                order = __balloc_max_order_in(a, bstart, bend);
+                buddy = balloc_new_buddy(a, NULL, bstart, order);
+                if (!buddy)
+                        goto cleanup;
+                balloc_blist_add(a, buddy);
+                bstart += balloc_order_to_len(a, order);
+        }
+        return 0;
+cleanup:
+        for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) {
+                if (!nvgpu_list_empty(balloc_get_order_list(a, i))) {
+                        buddy = nvgpu_list_first_entry(
+                                        balloc_get_order_list(a, i),
+                                        nvgpu_buddy, buddy_entry);
+                        balloc_blist_rem(a, buddy);
+                        nvgpu_kmem_cache_free(a->buddy_cache, buddy);
+                }
+        }
+        return -ENOMEM;
+}
+/*
+ * Clean up and destroy the passed allocator.
+ */
+static void nvgpu_buddy_allocator_destroy(struct nvgpu_allocator *__a)
+{
+        int i;
+        struct nvgpu_rbtree_node *node = NULL;
+        struct nvgpu_buddy *bud;
+        struct nvgpu_fixed_alloc *falloc;
+        struct nvgpu_buddy_allocator *a = __a->priv;
+        alloc_lock(__a);
+#ifdef CONFIG_DEBUG_FS
+        nvgpu_fini_alloc_debug(__a);
+#endif
+        /*
+         * Free the fixed allocs first.
+         */
+        nvgpu_rbtree_enum_start(0, &node, a->fixed_allocs);
+        while (node) {
+                falloc = nvgpu_fixed_alloc_from_rbtree_node(node);
+                nvgpu_rbtree_unlink(node, &a->fixed_allocs);
+                __balloc_do_free_fixed(a, falloc);
+                nvgpu_rbtree_enum_start(0, &node, a->fixed_allocs);
+        }
+        /*
+         * And now free all outstanding allocations.
+         */
+        nvgpu_rbtree_enum_start(0, &node, a->alloced_buddies);
+        while (node) {
+                bud = nvgpu_buddy_from_rbtree_node(node);
+                balloc_free_buddy(a, bud->start);
+                balloc_blist_add(a, bud);
+                balloc_coalesce(a, bud);
+                nvgpu_rbtree_enum_start(0, &node, a->alloced_buddies);
+        }
+        /*
+         * Now clean up the unallocated buddies.
+         */
+        for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) {
+                BUG_ON(a->buddy_list_alloced[i] != 0);
+                while (!nvgpu_list_empty(balloc_get_order_list(a, i))) {
+                        bud = nvgpu_list_first_entry(
+                                                balloc_get_order_list(a, i),
+                                                nvgpu_buddy, buddy_entry);
+                        balloc_blist_rem(a, bud);
+                        nvgpu_kmem_cache_free(a->buddy_cache, bud);
+                }
+                if (a->buddy_list_len[i] != 0) {
+                        nvgpu_info(__a->g,
+                                        "Excess buddies!!! (%d: %llu)\n",
+                                i, a->buddy_list_len[i]);
+                        BUG();
+                }
+                if (a->buddy_list_split[i] != 0) {
+                        nvgpu_info(__a->g,
+                                        "Excess split nodes!!! (%d: %llu)\n",
+                                i, a->buddy_list_split[i]);
+                        BUG();
+                }
+                if (a->buddy_list_alloced[i] != 0) {
+                        nvgpu_info(__a->g,
+                                        "Excess alloced nodes!!! (%d: %llu)\n",
+                                i, a->buddy_list_alloced[i]);
+                        BUG();
+                }
+        }
+        nvgpu_kmem_cache_destroy(a->buddy_cache);
+        nvgpu_kfree(nvgpu_alloc_to_gpu(__a), a);
+        alloc_unlock(__a);
+}
+/*
+ * Combine the passed buddy if possible. The pointer in @b may not be valid
+ * after this as the buddy may be freed.
+ *
+ * @a must be locked.
+ */
+static void balloc_coalesce(struct nvgpu_buddy_allocator *a,
+                            struct nvgpu_buddy *b)
+{
+        struct nvgpu_buddy *parent;
+        if (buddy_is_alloced(b) || buddy_is_split(b))
+                return;
+        /*
+         * If both our buddy and I are both not allocated and not split then
+         * we can coalesce ourselves.
+         */
+        if (!b->buddy)
+                return;
+        if (buddy_is_alloced(b->buddy) || buddy_is_split(b->buddy))
+                return;
+        parent = b->parent;
+        balloc_blist_rem(a, b);
+        balloc_blist_rem(a, b->buddy);
+        buddy_clr_split(parent);
+        a->buddy_list_split[parent->order]--;
+        balloc_blist_add(a, parent);
+        /*
+         * Recursively coalesce as far as we can go.
+         */
+        balloc_coalesce(a, parent);
+        /* Clean up the remains. */
+        nvgpu_kmem_cache_free(a->buddy_cache, b->buddy);
+        nvgpu_kmem_cache_free(a->buddy_cache, b);
+}
+/*
+ * Split a buddy into two new buddies who are 1/2 the size of the parent buddy.
+ *
+ * @a must be locked.
+ */
+static int balloc_split_buddy(struct nvgpu_buddy_allocator *a,
+                              struct nvgpu_buddy *b, int pte_size)
+{
+        struct nvgpu_buddy *left, *right;
+        u64 half;
+        left = balloc_new_buddy(a, b, b->start, b->order - 1);
+        if (!left)
+                return -ENOMEM;
+        half = (b->end - b->start) / 2;
+        right = balloc_new_buddy(a, b, b->start + half, b->order - 1);
+        if (!right) {
+                nvgpu_kmem_cache_free(a->buddy_cache, left);
+                return -ENOMEM;
+        }
+        buddy_set_split(b);
+        a->buddy_list_split[b->order]++;
+        b->left = left;
+        b->right = right;
+        left->buddy = right;
+        right->buddy = left;
+        left->parent = b;
+        right->parent = b;
+        /* PTE considerations. */
+        if (a->flags & GPU_ALLOC_GVA_SPACE &&
+            left->order <= a->pte_blk_order) {
+                left->pte_size = pte_size;
+                right->pte_size = pte_size;
+        }
+        balloc_blist_rem(a, b);
+        balloc_blist_add(a, left);
+        balloc_blist_add(a, right);
+        return 0;
+}
+/*
+ * Place the passed buddy into the RB tree for allocated buddies. Never fails
+ * unless the passed entry is a duplicate which is a bug.
+ *
+ * @a must be locked.
+ */
+static void balloc_alloc_buddy(struct nvgpu_buddy_allocator *a,
+                               struct nvgpu_buddy *b)
+{
+        b->alloced_entry.key_start = b->start;
+        b->alloced_entry.key_end = b->end;
+        nvgpu_rbtree_insert(&b->alloced_entry, &a->alloced_buddies);
+        buddy_set_alloced(b);
+        a->buddy_list_alloced[b->order]++;
+}
+/*
+ * Remove the passed buddy from the allocated buddy RB tree. Returns the
+ * deallocated buddy for further processing.
+ *
+ * @a must be locked.
+ */
+static struct nvgpu_buddy *balloc_free_buddy(struct nvgpu_buddy_allocator *a,
+                                             u64 addr)
+{
+        struct nvgpu_rbtree_node *node = NULL;
+        struct nvgpu_buddy *bud;
+        nvgpu_rbtree_search(addr, &node, a->alloced_buddies);
+        if (!node)
+                return NULL;
+        bud = nvgpu_buddy_from_rbtree_node(node);
+        nvgpu_rbtree_unlink(node, &a->alloced_buddies);
+        buddy_clr_alloced(bud);
+        a->buddy_list_alloced[bud->order]--;
+        return bud;
+}
+/*
+ * Find a suitable buddy for the given order and PTE type (big or little).
+ */
+static struct nvgpu_buddy *__balloc_find_buddy(struct nvgpu_buddy_allocator *a,
+                                               u64 order, int pte_size)
+{
+        struct nvgpu_buddy *bud;
+        if (order > a->max_order ||
+            nvgpu_list_empty(balloc_get_order_list(a, order)))
+                return NULL;
+        if (a->flags & GPU_ALLOC_GVA_SPACE &&
+            pte_size == gmmu_page_size_big)
+                bud = nvgpu_list_last_entry(balloc_get_order_list(a, order),
+                                      nvgpu_buddy, buddy_entry);
+        else
+                bud = nvgpu_list_first_entry(balloc_get_order_list(a, order),
+                                       nvgpu_buddy, buddy_entry);
+        if (pte_size != BALLOC_PTE_SIZE_ANY &&
+            pte_size != bud->pte_size &&
+            bud->pte_size != BALLOC_PTE_SIZE_ANY)
+                return NULL;
+        return bud;
+}
+/*
+ * Allocate a suitably sized buddy. If no suitable buddy exists split higher
+ * order buddies until we have a suitable buddy to allocate.
+ *
+ * For PDE grouping add an extra check to see if a buddy is suitable: that the
+ * buddy exists in a PDE who's PTE size is reasonable
+ *
+ * @a must be locked.
+ */
+static u64 __balloc_do_alloc(struct nvgpu_buddy_allocator *a,
+                             u64 order, int pte_size)
+{
+        u64 split_order;
+        struct nvgpu_buddy *bud = NULL;
+        split_order = order;
+        while (split_order <= a->max_order &&
+               !(bud = __balloc_find_buddy(a, split_order, pte_size)))
+                split_order++;
+        /* Out of memory! */
+        if (!bud)
+                return 0;
+        while (bud->order != order) {
+                if (balloc_split_buddy(a, bud, pte_size))
+                        return 0; /* No mem... */
+                bud = bud->left;
+        }
+        balloc_blist_rem(a, bud);
+        balloc_alloc_buddy(a, bud);
+        return bud->start;
+}
+/*
+ * See if the passed range is actually available for allocation. If so, then
+ * return 1, otherwise return 0.
+ *
+ * TODO: Right now this uses the unoptimal approach of going through all
+ * outstanding allocations and checking their base/ends. This could be better.
+ */
+static int balloc_is_range_free(struct nvgpu_buddy_allocator *a,
+                                u64 base, u64 end)
+{
+        struct nvgpu_rbtree_node *node = NULL;
+        struct nvgpu_buddy *bud;
+        nvgpu_rbtree_enum_start(0, &node, a->alloced_buddies);
+        if (!node)
+                return 1; /* No allocs yet. */
+        bud = nvgpu_buddy_from_rbtree_node(node);
+        while (bud->start < end) {
+                if ((bud->start > base && bud->start < end) ||
+                    (bud->end   > base && bud->end   < end))
+                        return 0;
+                nvgpu_rbtree_enum_next(&node, node);
+                if (!node)
+                        break;
+                bud = nvgpu_buddy_from_rbtree_node(node);
+        }
+        return 1;
+}
+static void balloc_alloc_fixed(struct nvgpu_buddy_allocator *a,
+                               struct nvgpu_fixed_alloc *f)
+{
+        f->alloced_entry.key_start = f->start;
+        f->alloced_entry.key_end = f->end;
+        nvgpu_rbtree_insert(&f->alloced_entry, &a->fixed_allocs);
+}
+/*
+ * Remove the passed buddy from the allocated buddy RB tree. Returns the
+ * deallocated buddy for further processing.
+ *
+ * @a must be locked.
+ */
+static struct nvgpu_fixed_alloc *balloc_free_fixed(
+        struct nvgpu_buddy_allocator *a, u64 addr)
+{
+        struct nvgpu_fixed_alloc *falloc;
+        struct nvgpu_rbtree_node *node = NULL;
+        nvgpu_rbtree_search(addr, &node, a->fixed_allocs);
+        if (!node)
+                return NULL;
+        falloc = nvgpu_fixed_alloc_from_rbtree_node(node);
+        nvgpu_rbtree_unlink(node, &a->fixed_allocs);
+        return falloc;
+}
+/*
+ * Find the parent range - doesn't necessarily need the parent to actually exist
+ * as a buddy. Finding an existing parent comes later...
+ */
+static void __balloc_get_parent_range(struct nvgpu_buddy_allocator *a,
+                                      u64 base, u64 order,
+                                      u64 *pbase, u64 *porder)
+{
+        u64 base_mask;
+        u64 shifted_base = balloc_base_shift(a, base);
+        order++;
+        base_mask = ~((a->blk_size << order) - 1);
+        shifted_base &= base_mask;
+        *pbase = balloc_base_unshift(a, shifted_base);
+        *porder = order;
+}
+/*
+ * Makes a buddy at the passed address. This will make all parent buddies
+ * necessary for this buddy to exist as well.
+ */
+static struct nvgpu_buddy *__balloc_make_fixed_buddy(
+        struct nvgpu_buddy_allocator *a, u64 base, u64 order, int pte_size)
+{
+        struct nvgpu_buddy *bud = NULL;
+        struct nvgpu_list_node *order_list;
+        u64 cur_order = order, cur_base = base;
+        /*
+         * Algo:
+         *  1. Keep jumping up a buddy order until we find the real buddy that
+         *     this buddy exists in.
+         *  2. Then work our way down through the buddy tree until we hit a dead
+         *     end.
+         *  3. Start splitting buddies until we split to the one we need to
+         *     make.
+         */
+        while (cur_order <= a->max_order) {
+                int found = 0;
+                order_list = balloc_get_order_list(a, cur_order);
+                nvgpu_list_for_each_entry(bud, order_list,
+                                        nvgpu_buddy, buddy_entry) {
+                        if (bud->start == cur_base) {
+                                /*
+                                 * Make sure page size matches if it's smaller
+                                 * than a PDE sized buddy.
+                                 */
+                                if (bud->order <= a->pte_blk_order &&
+                                    bud->pte_size != BALLOC_PTE_SIZE_ANY &&
+                                    bud->pte_size != pte_size) {
+                                        /* Welp, that's the end of that. */
+                                        alloc_dbg(balloc_owner(a),
+                                                  "Fixed buddy PTE "
+                                                  "size mismatch!\n");
+                                        return NULL;
+                                }
+                                found = 1;
+                                break;
+                        }
+                }
+                if (found)
+                        break;
+                __balloc_get_parent_range(a, cur_base, cur_order,
+                                          &cur_base, &cur_order);
+        }
+        if (cur_order > a->max_order) {
+                alloc_dbg(balloc_owner(a), "No buddy for range ???\n");
+                return NULL;
+        }
+        /* Split this buddy as necessary until we get the target buddy. */
+        while (bud->start != base || bud->order != order) {
+                if (balloc_split_buddy(a, bud, pte_size)) {
+                        alloc_dbg(balloc_owner(a),
+                                  "split buddy failed? {0x%llx, %llu}\n",
+                                  bud->start, bud->order);
+                        balloc_coalesce(a, bud);
+                        return NULL;
+                }
+                if (base < bud->right->start)
+                        bud = bud->left;
+                else
+                        bud = bud->right;
+        }
+        return bud;
+}
+static u64 __balloc_do_alloc_fixed(struct nvgpu_buddy_allocator *a,
+                                   struct nvgpu_fixed_alloc *falloc,
+                                   u64 base, u64 len, int pte_size)
+{
+        u64 shifted_base, inc_base;
+        u64 align_order;
+        shifted_base = balloc_base_shift(a, base);
+        if (shifted_base == 0)
+                align_order = __fls(len >> a->blk_shift);
+        else
+                align_order = min_t(u64,
+                                    __ffs(shifted_base >> a->blk_shift),
+                                    __fls(len >> a->blk_shift));
+        if (align_order > a->max_order) {
+                alloc_dbg(balloc_owner(a),
+                          "Align order too big: %llu > %llu\n",
+                          align_order, a->max_order);
+                return 0;
+        }
+        /*
+         * Generate a list of buddies that satisfy this allocation.
+         */
+        inc_base = shifted_base;
+        while (inc_base < (shifted_base + len)) {
+                u64 order_len = balloc_order_to_len(a, align_order);
+                u64 remaining;
+                struct nvgpu_buddy *bud;
+                bud = __balloc_make_fixed_buddy(a,
+                                        balloc_base_unshift(a, inc_base),
+                                        align_order, pte_size);
+                if (!bud) {
+                        alloc_dbg(balloc_owner(a),
+                                  "Fixed buddy failed: {0x%llx, %llu}!\n",
+                                  balloc_base_unshift(a, inc_base),
+                                  align_order);
+                        goto err_and_cleanup;
+                }
+                balloc_blist_rem(a, bud);
+                balloc_alloc_buddy(a, bud);
+                __balloc_buddy_list_add(a, bud, &falloc->buddies);
+                /* Book keeping. */
+                inc_base += order_len;
+                remaining = (shifted_base + len) - inc_base;
+                align_order = __ffs(inc_base >> a->blk_shift);
+                /* If we don't have much left - trim down align_order. */
+                if (balloc_order_to_len(a, align_order) > remaining)
+                        align_order = __balloc_max_order_in(a, inc_base,
+                                                        inc_base + remaining);
+        }
+        return base;
+err_and_cleanup:
+        while (!nvgpu_list_empty(&falloc->buddies)) {
+                struct nvgpu_buddy *bud = nvgpu_list_first_entry(
+                                                &falloc->buddies,
+                                                nvgpu_buddy, buddy_entry);
+                __balloc_buddy_list_rem(a, bud);
+                balloc_free_buddy(a, bud->start);
+                nvgpu_kmem_cache_free(a->buddy_cache, bud);
+        }
+        return 0;
+}
+static void __balloc_do_free_fixed(struct nvgpu_buddy_allocator *a,
+                                   struct nvgpu_fixed_alloc *falloc)
+{
+        struct nvgpu_buddy *bud;
+        while (!nvgpu_list_empty(&falloc->buddies)) {
+                bud = nvgpu_list_first_entry(&falloc->buddies,
+                                       nvgpu_buddy,
+                                       buddy_entry);
+                __balloc_buddy_list_rem(a, bud);
+                balloc_free_buddy(a, bud->start);
+                balloc_blist_add(a, bud);
+                a->bytes_freed += balloc_order_to_len(a, bud->order);
+                /*
+                 * Attemp to defrag the allocation.
+                 */
+                balloc_coalesce(a, bud);
+        }
+        nvgpu_kfree(nvgpu_alloc_to_gpu(a->owner), falloc);
+}
+/*
+ * Allocate memory from the passed allocator.
+ */
+static u64 nvgpu_buddy_balloc(struct nvgpu_allocator *__a, u64 len)
+{
+        u64 order, addr;
+        int pte_size;
+        struct nvgpu_buddy_allocator *a = __a->priv;
+        alloc_lock(__a);
+        order = balloc_get_order(a, len);
+        if (order > a->max_order) {
+                alloc_unlock(__a);
+                alloc_dbg(balloc_owner(a), "Alloc fail\n");
+                return 0;
+        }
+        if (a->flags & GPU_ALLOC_GVA_SPACE)
+                pte_size = __get_pte_size(a->vm, 0, len);
+        else
+                pte_size = BALLOC_PTE_SIZE_ANY;
+        addr = __balloc_do_alloc(a, order, pte_size);
+        if (addr) {
+                a->bytes_alloced += len;
+                a->bytes_alloced_real += balloc_order_to_len(a, order);
+                alloc_dbg(balloc_owner(a),
+                          "Alloc 0x%-10llx %3lld:0x%-10llx pte_size=%s\n",
+                          addr, order, len,
+                          pte_size == gmmu_page_size_big   ? "big" :
+                          pte_size == gmmu_page_size_small ? "small" :
+                          "NA/any");
+        } else {
+                alloc_dbg(balloc_owner(a), "Alloc failed: no mem!\n");
+        }
+        a->alloc_made = 1;
+        alloc_unlock(__a);
+        return addr;
+}
+/*
+ * Requires @__a to be locked.
+ */
+static u64 __nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a,
+                                      u64 base, u64 len, u32 page_size)
+{
+        int pte_size = BALLOC_PTE_SIZE_ANY;
+        u64 ret, real_bytes = 0;
+        struct nvgpu_buddy *bud;
+        struct nvgpu_fixed_alloc *falloc = NULL;
+        struct nvgpu_buddy_allocator *a = __a->priv;
+        /* If base isn't aligned to an order 0 block, fail. */
+        if (base & (a->blk_size - 1))
+                goto fail;
+        if (len == 0)
+                goto fail;
+        /* Check that the page size is valid. */
+        if (a->flags & GPU_ALLOC_GVA_SPACE && a->vm->big_pages) {
+                if (page_size == a->vm->big_page_size)
+                        pte_size = gmmu_page_size_big;
+                else if (page_size == SZ_4K)
+                        pte_size = gmmu_page_size_small;
+                else
+                        goto fail;
+        }
+        falloc = nvgpu_kmalloc(nvgpu_alloc_to_gpu(__a), sizeof(*falloc));
+        if (!falloc)
+                goto fail;
+        nvgpu_init_list_node(&falloc->buddies);
+        falloc->start = base;
+        falloc->end = base + len;
+        if (!balloc_is_range_free(a, base, base + len)) {
+                alloc_dbg(balloc_owner(a),
+                          "Range not free: 0x%llx -> 0x%llx\n",
+                          base, base + len);
+                goto fail_unlock;
+        }
+        ret = __balloc_do_alloc_fixed(a, falloc, base, len, pte_size);
+        if (!ret) {
+                alloc_dbg(balloc_owner(a),
+                          "Alloc-fixed failed ?? 0x%llx -> 0x%llx\n",
+                          base, base + len);
+                goto fail_unlock;
+        }
+        balloc_alloc_fixed(a, falloc);
+        nvgpu_list_for_each_entry(bud, &falloc->buddies,
+                                nvgpu_buddy, buddy_entry)
+                real_bytes += (bud->end - bud->start);
+        a->bytes_alloced += len;
+        a->bytes_alloced_real += real_bytes;
+        alloc_dbg(balloc_owner(a), "Alloc (fixed) 0x%llx\n", base);
+        return base;
+fail_unlock:
+        alloc_unlock(__a);
+fail:
+        nvgpu_kfree(nvgpu_alloc_to_gpu(__a), falloc);
+        return 0;
+}
+/*
+ * Allocate a fixed address allocation. The address of the allocation is @base
+ * and the length is @len. This is not a typical buddy allocator operation and
+ * as such has a high posibility of failure if the address space is heavily in
+ * use.
+ *
+ * Please do not use this function unless _absolutely_ necessary.
+ */
+static u64 nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a,
+                                    u64 base, u64 len, u32 page_size)
+{
+        u64 alloc;
+        struct nvgpu_buddy_allocator *a = __a->priv;
+        alloc_lock(__a);
+        alloc = __nvgpu_balloc_fixed_buddy(__a, base, len, page_size);
+        a->alloc_made = 1;
+        alloc_unlock(__a);
+        return alloc;
+}
+/*
+ * Free the passed allocation.
+ */
+static void nvgpu_buddy_bfree(struct nvgpu_allocator *__a, u64 addr)
+{
+        struct nvgpu_buddy *bud;
+        struct nvgpu_fixed_alloc *falloc;
+        struct nvgpu_buddy_allocator *a = __a->priv;
+        if (!addr)
+                return;
+        alloc_lock(__a);
+        /*
+         * First see if this is a fixed alloc. If not fall back to a regular
+         * buddy.
+         */
+        falloc = balloc_free_fixed(a, addr);
+        if (falloc) {
+                __balloc_do_free_fixed(a, falloc);
+                goto done;
+        }
+        bud = balloc_free_buddy(a, addr);
+        if (!bud)
+                goto done;
+        balloc_blist_add(a, bud);
+        a->bytes_freed += balloc_order_to_len(a, bud->order);
+        /*
+         * Attemp to defrag the allocation.
+         */
+        balloc_coalesce(a, bud);
+done:
+        alloc_unlock(__a);
+        alloc_dbg(balloc_owner(a), "Free 0x%llx\n", addr);
+        return;
+}
+static bool nvgpu_buddy_reserve_is_possible(struct nvgpu_buddy_allocator *a,
+                                            struct nvgpu_alloc_carveout *co)
+{
+        struct nvgpu_alloc_carveout *tmp;
+        u64 co_base, co_end;
+        co_base = co->base;
+        co_end  = co->base + co->length;
+        /*
+         * Not the fastest approach but we should not have that many carveouts
+         * for any reasonable allocator.
+         */
+        nvgpu_list_for_each_entry(tmp, &a->co_list,
+                                nvgpu_alloc_carveout, co_entry) {
+                if ((co_base >= tmp->base &&
+                     co_base < (tmp->base + tmp->length)) ||
+                    (co_end >= tmp->base &&
+                     co_end < (tmp->base + tmp->length)))
+                        return false;
+        }
+        return true;
+}
+/*
+ * Carveouts can only be reserved before any regular allocations have been
+ * made.
+ */
+static int nvgpu_buddy_reserve_co(struct nvgpu_allocator *__a,
+                                  struct nvgpu_alloc_carveout *co)
+{
+        struct nvgpu_buddy_allocator *a = __a->priv;
+        u64 addr;
+        int err = 0;
+        if (co->base < a->start || (co->base + co->length) > a->end ||
+            a->alloc_made)
+                return -EINVAL;
+        alloc_lock(__a);
+        if (!nvgpu_buddy_reserve_is_possible(a, co)) {
+                err = -EBUSY;
+                goto done;
+        }
+        /* Should not be possible to fail... */
+        addr = __nvgpu_balloc_fixed_buddy(__a, co->base, co->length, 0);
+        if (!addr) {
+                err = -ENOMEM;
+                nvgpu_warn(__a->g,
+                                "%s: Failed to reserve a valid carveout!\n",
+                                __func__);
+                goto done;
+        }
+        nvgpu_list_add(&co->co_entry, &a->co_list);
+done:
+        alloc_unlock(__a);
+        return err;
+}
+/*
+ * Carveouts can be release at any time.
+ */
+static void nvgpu_buddy_release_co(struct nvgpu_allocator *__a,
+                                   struct nvgpu_alloc_carveout *co)
+{
+        alloc_lock(__a);
+        nvgpu_list_del(&co->co_entry);
+        nvgpu_free(__a, co->base);
+        alloc_unlock(__a);
+}
+static u64 nvgpu_buddy_alloc_length(struct nvgpu_allocator *a)
+{
+        struct nvgpu_buddy_allocator *ba = a->priv;
+        return ba->length;
+}
+static u64 nvgpu_buddy_alloc_base(struct nvgpu_allocator *a)
+{
+        struct nvgpu_buddy_allocator *ba = a->priv;
+        return ba->start;
+}
+static int nvgpu_buddy_alloc_inited(struct nvgpu_allocator *a)
+{
+        struct nvgpu_buddy_allocator *ba = a->priv;
+        int inited = ba->initialized;
+        nvgpu_smp_rmb();
+        return inited;
+}
+static u64 nvgpu_buddy_alloc_end(struct nvgpu_allocator *a)
+{
+        struct nvgpu_buddy_allocator *ba = a->priv;
+        return ba->end;
+}
+static u64 nvgpu_buddy_alloc_space(struct nvgpu_allocator *a)
+{
+        struct nvgpu_buddy_allocator *ba = a->priv;
+        u64 space;
+        alloc_lock(a);
+        space = ba->end - ba->start -
+                (ba->bytes_alloced_real - ba->bytes_freed);
+        alloc_unlock(a);
+        return space;
+}
+#ifdef __KERNEL__
+/*
+ * Print the buddy allocator top level stats. If you pass @s as NULL then the
+ * stats are printed to the kernel log. This lets this code be used for
+ * debugging purposes internal to the allocator.
+ */
+static void nvgpu_buddy_print_stats(struct nvgpu_allocator *__a,
+                                    struct seq_file *s, int lock)
+{
+        int i = 0;
+        struct nvgpu_rbtree_node *node = NULL;
+        struct nvgpu_fixed_alloc *falloc;
+        struct nvgpu_alloc_carveout *tmp;
+        struct nvgpu_buddy_allocator *a = __a->priv;
+        __alloc_pstat(s, __a, "base = %llu, limit = %llu, blk_size = %llu\n",
+                      a->base, a->length, a->blk_size);
+        __alloc_pstat(s, __a, "Internal params:\n");
+        __alloc_pstat(s, __a, "  start = 0x%llx\n", a->start);
+        __alloc_pstat(s, __a, "  end   = 0x%llx\n", a->end);
+        __alloc_pstat(s, __a, "  count = 0x%llx\n", a->count);
+        __alloc_pstat(s, __a, "  blks  = 0x%llx\n", a->blks);
+        __alloc_pstat(s, __a, "  max_order = %llu\n", a->max_order);
+        if (lock)
+                alloc_lock(__a);
+        if (!nvgpu_list_empty(&a->co_list)) {
+                __alloc_pstat(s, __a, "\n");
+                __alloc_pstat(s, __a, "Carveouts:\n");
+                nvgpu_list_for_each_entry(tmp, &a->co_list,
+                                        nvgpu_alloc_carveout, co_entry)
+                        __alloc_pstat(s, __a,
+                                      "  CO %2d: %-20s 0x%010llx + 0x%llx\n",
+                                      i++, tmp->name, tmp->base, tmp->length);
+        }
+        __alloc_pstat(s, __a, "\n");
+        __alloc_pstat(s, __a, "Buddy blocks:\n");
+        __alloc_pstat(s, __a, "  Order   Free    Alloced   Split\n");
+        __alloc_pstat(s, __a, "  -----   ----    -------   -----\n");
+        for (i = a->max_order; i >= 0; i--) {
+                if (a->buddy_list_len[i] == 0 &&
+                    a->buddy_list_alloced[i] == 0 &&
+                    a->buddy_list_split[i] == 0)
+                        continue;
+                __alloc_pstat(s, __a, "  %3d     %-7llu %-9llu %llu\n", i,
+                              a->buddy_list_len[i],
+                              a->buddy_list_alloced[i],
+                              a->buddy_list_split[i]);
+        }
+        __alloc_pstat(s, __a, "\n");
+        nvgpu_rbtree_enum_start(0, &node, a->fixed_allocs);
+        i = 1;
+        while (node) {
+                falloc = nvgpu_fixed_alloc_from_rbtree_node(node);
+                __alloc_pstat(s, __a, "Fixed alloc (%d): [0x%llx -> 0x%llx]\n",
+                              i, falloc->start, falloc->end);
+                nvgpu_rbtree_enum_next(&node, a->fixed_allocs);
+        }
+        __alloc_pstat(s, __a, "\n");
+        __alloc_pstat(s, __a, "Bytes allocated:        %llu\n",
+                      a->bytes_alloced);
+        __alloc_pstat(s, __a, "Bytes allocated (real): %llu\n",
+                      a->bytes_alloced_real);
+        __alloc_pstat(s, __a, "Bytes freed:            %llu\n",
+                      a->bytes_freed);
+        if (lock)
+                alloc_unlock(__a);
+}
+#endif
+static const struct nvgpu_allocator_ops buddy_ops = {
+        .alloc          = nvgpu_buddy_balloc,
+        .free           = nvgpu_buddy_bfree,
+        .alloc_fixed    = nvgpu_balloc_fixed_buddy,
+        /* .free_fixed not needed. */
+        .reserve_carveout       = nvgpu_buddy_reserve_co,
+        .release_carveout       = nvgpu_buddy_release_co,
+        .base           = nvgpu_buddy_alloc_base,
+        .length         = nvgpu_buddy_alloc_length,
+        .end            = nvgpu_buddy_alloc_end,
+        .inited         = nvgpu_buddy_alloc_inited,
+        .space          = nvgpu_buddy_alloc_space,
+        .fini           = nvgpu_buddy_allocator_destroy,
+#ifdef __KERNEL__
+        .print_stats    = nvgpu_buddy_print_stats,
+#endif
+};
+/*
+ * Initialize a buddy allocator. Returns 0 on success. This allocator does
+ * not necessarily manage bytes. It manages distinct ranges of resources. This
+ * allows the allocator to work for things like comp_tags, semaphores, etc.
+ *
+ * @allocator: Ptr to an allocator struct to init.
+ * @vm: GPU VM to associate this allocator with. Can be NULL. Will be used to
+ *      get PTE size for GVA spaces.
+ * @name: Name of the allocator. Doesn't have to be static storage.
+ * @base: The base address of the resource pool being managed.
+ * @size: Number of resources in the pool.
+ * @blk_size: Minimum number of resources to allocate at once. For things like
+ *            semaphores this is 1. For GVA this might be as much as 64k. This
+ *            corresponds to order 0. Must be power of 2.
+ * @max_order: Pick a maximum order. If you leave this as 0, the buddy allocator
+ *             will try and pick a reasonable max order.
+ * @flags: Extra flags necessary. See GPU_BALLOC_*.
+ */
+int __nvgpu_buddy_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
+                                 struct vm_gk20a *vm, const char *name,
+                                 u64 base, u64 size, u64 blk_size,
+                                 u64 max_order, u64 flags)
+{
+        int err;
+        u64 pde_size;
+        struct nvgpu_buddy_allocator *a;
+        /* blk_size must be greater than 0 and a power of 2. */
+        if (blk_size == 0)
+                return -EINVAL;
+        if (blk_size & (blk_size - 1))
+                return -EINVAL;
+        if (max_order > GPU_BALLOC_MAX_ORDER)
+                return -EINVAL;
+        /* If this is to manage a GVA space we need a VM. */
+        if (flags & GPU_ALLOC_GVA_SPACE && !vm)
+                return -EINVAL;
+        a = nvgpu_kzalloc(g, sizeof(struct nvgpu_buddy_allocator));
+        if (!a)
+                return -ENOMEM;
+        err = __nvgpu_alloc_common_init(__a, g, name, a, false, &buddy_ops);
+        if (err)
+                goto fail;
+        a->base = base;
+        a->length = size;
+        a->blk_size = blk_size;
+        a->blk_shift = __ffs(blk_size);
+        a->owner = __a;
+        /*
+         * If base is 0 then modfy base to be the size of one block so that we
+         * can return errors by returning addr == 0.
+         */
+        if (a->base == 0) {
+                a->base = a->blk_size;
+                a->length -= a->blk_size;
+        }
+        a->vm = vm;
+        if (flags & GPU_ALLOC_GVA_SPACE) {
+                pde_size = ((u64)vm->big_page_size) << 10;
+                a->pte_blk_order = balloc_get_order(a, pde_size);
+        }
+        /*
+         * When we have a GVA space with big_pages enabled the size and base
+         * must be PDE aligned. If big_pages are not enabled then this
+         * requirement is not necessary.
+         */
+        if (flags & GPU_ALLOC_GVA_SPACE && vm->big_pages &&
+            (base & ((vm->big_page_size << 10) - 1) ||
+             size & ((vm->big_page_size << 10) - 1)))
+                return -EINVAL;
+        a->flags = flags;
+        a->max_order = max_order;
+        balloc_allocator_align(a);
+        balloc_compute_max_order(a);
+        a->buddy_cache = nvgpu_kmem_cache_create(g, sizeof(struct nvgpu_buddy));
+        if (!a->buddy_cache) {
+                err = -ENOMEM;
+                goto fail;
+        }
+        a->alloced_buddies = NULL;
+        a->fixed_allocs = NULL;
+        nvgpu_init_list_node(&a->co_list);
+        err = balloc_init_lists(a);
+        if (err)
+                goto fail;
+        nvgpu_smp_wmb();
+        a->initialized = 1;
+#ifdef CONFIG_DEBUG_FS
+        nvgpu_init_alloc_debug(g, __a);
+#endif
+        alloc_dbg(__a, "New allocator: type      buddy\n");
+        alloc_dbg(__a, "               base      0x%llx\n", a->base);
+        alloc_dbg(__a, "               size      0x%llx\n", a->length);
+        alloc_dbg(__a, "               blk_size  0x%llx\n", a->blk_size);
+        if (flags & GPU_ALLOC_GVA_SPACE)
+                alloc_dbg(balloc_owner(a),
+                       "               pde_size  0x%llx\n",
+                          balloc_order_to_len(a, a->pte_blk_order));
+        alloc_dbg(__a, "               max_order %llu\n", a->max_order);
+        alloc_dbg(__a, "               flags     0x%llx\n", a->flags);
+        return 0;
+fail:
+        if (a->buddy_cache)
+                nvgpu_kmem_cache_destroy(a->buddy_cache);
+        nvgpu_kfree(g, a);
+        return err;
+}
+int nvgpu_buddy_allocator_init(struct gk20a *g, struct nvgpu_allocator *a,
+                               const char *name, u64 base, u64 size,
+                               u64 blk_size, u64 flags)
+{
+        return __nvgpu_buddy_allocator_init(g, a, NULL, name,
+                                            base, size, blk_size, 0, 0);
+}
diff --git a/drivers/gpu/nvgpu/common/mm/buddy_allocator_priv.h b/drivers/gpu/nvgpu/common/mm/buddy_allocator_priv.h
new file mode 100644
index 00000000..c9e332a5
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/buddy_allocator_priv.h
@@ -0,0 +1,222 @@
+/*
+ * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef BUDDY_ALLOCATOR_PRIV_H
+#define BUDDY_ALLOCATOR_PRIV_H
+#include <nvgpu/rbtree.h>
+#include <nvgpu/list.h>
+struct nvgpu_kmem_cache;
+struct nvgpu_allocator;
+struct vm_gk20a;
+/*
+ * Each buddy is an element in a binary tree.
+ */
+struct nvgpu_buddy {
+        struct nvgpu_buddy *parent;     /* Parent node. */
+        struct nvgpu_buddy *buddy;      /* This node's buddy. */
+        struct nvgpu_buddy *left;       /* Lower address sub-node. */
+        struct nvgpu_buddy *right;      /* Higher address sub-node. */
+        struct nvgpu_list_node buddy_entry;     /* List entry for various lists. */
+        struct nvgpu_rbtree_node alloced_entry; /* RB tree of allocations. */
+        u64 start;                      /* Start address of this buddy. */
+        u64 end;                        /* End address of this buddy. */
+        u64 order;                      /* Buddy order. */
+#define BALLOC_BUDDY_ALLOCED    0x1
+#define BALLOC_BUDDY_SPLIT      0x2
+#define BALLOC_BUDDY_IN_LIST    0x4
+        int flags;                      /* List of associated flags. */
+        /*
+         * Size of the PDE this buddy is using. This allows for grouping like
+         * sized allocations into the same PDE. This uses the gmmu_pgsz_gk20a
+         * enum except for the BALLOC_PTE_SIZE_ANY specifier.
+         */
+#define BALLOC_PTE_SIZE_ANY     -1
+        int pte_size;
+};
+static inline struct nvgpu_buddy *
+nvgpu_buddy_from_buddy_entry(struct nvgpu_list_node *node)
+{
+        return (struct nvgpu_buddy *)
+                ((uintptr_t)node - offsetof(struct nvgpu_buddy, buddy_entry));
+};
+static inline struct nvgpu_buddy *
+nvgpu_buddy_from_rbtree_node(struct nvgpu_rbtree_node *node)
+{
+        return (struct nvgpu_buddy *)
+                ((uintptr_t)node - offsetof(struct nvgpu_buddy, alloced_entry));
+};
+#define __buddy_flag_ops(flag, flag_up)                                 \
+        static inline int buddy_is_ ## flag(struct nvgpu_buddy *b)      \
+        {                                                               \
+                return b->flags & BALLOC_BUDDY_ ## flag_up;             \
+        }                                                               \
+        static inline void buddy_set_ ## flag(struct nvgpu_buddy *b)    \
+        {                                                               \
+                b->flags |= BALLOC_BUDDY_ ## flag_up;                   \
+        }                                                               \
+        static inline void buddy_clr_ ## flag(struct nvgpu_buddy *b)    \
+        {                                                               \
+                b->flags &= ~BALLOC_BUDDY_ ## flag_up;                  \
+        }
+/*
+ * int  buddy_is_alloced(struct nvgpu_buddy *b);
+ * void buddy_set_alloced(struct nvgpu_buddy *b);
+ * void buddy_clr_alloced(struct nvgpu_buddy *b);
+ *
+ * int  buddy_is_split(struct nvgpu_buddy *b);
+ * void buddy_set_split(struct nvgpu_buddy *b);
+ * void buddy_clr_split(struct nvgpu_buddy *b);
+ *
+ * int  buddy_is_in_list(struct nvgpu_buddy *b);
+ * void buddy_set_in_list(struct nvgpu_buddy *b);
+ * void buddy_clr_in_list(struct nvgpu_buddy *b);
+ */
+__buddy_flag_ops(alloced, ALLOCED);
+__buddy_flag_ops(split,   SPLIT);
+__buddy_flag_ops(in_list, IN_LIST);
+/*
+ * Keeps info for a fixed allocation.
+ */
+struct nvgpu_fixed_alloc {
+        struct nvgpu_list_node buddies; /* List of buddies. */
+        struct nvgpu_rbtree_node alloced_entry; /* RB tree of fixed allocations. */
+        u64 start;                      /* Start of fixed block. */
+        u64 end;                        /* End address. */
+};
+static inline struct nvgpu_fixed_alloc *
+nvgpu_fixed_alloc_from_rbtree_node(struct nvgpu_rbtree_node *node)
+{
+        return (struct nvgpu_fixed_alloc *)
+        ((uintptr_t)node - offsetof(struct nvgpu_fixed_alloc, alloced_entry));
+};
+/*
+ * GPU buddy allocator for the various GPU address spaces. Each addressable unit
+ * doesn't have to correspond to a byte. In some cases each unit is a more
+ * complex object such as a comp_tag line or the like.
+ *
+ * The max order is computed based on the size of the minimum order and the size
+ * of the address space.
+ *
+ * order_size is the size of an order 0 buddy.
+ */
+struct nvgpu_buddy_allocator {
+        struct nvgpu_allocator *owner;  /* Owner of this buddy allocator. */
+        struct vm_gk20a *vm;            /* Parent VM - can be NULL. */
+        u64 base;                       /* Base address of the space. */
+        u64 length;                     /* Length of the space. */
+        u64 blk_size;                   /* Size of order 0 allocation. */
+        u64 blk_shift;                  /* Shift to divide by blk_size. */
+        /* Internal stuff. */
+        u64 start;                      /* Real start (aligned to blk_size). */
+        u64 end;                        /* Real end, trimmed if needed. */
+        u64 count;                      /* Count of objects in space. */
+        u64 blks;                       /* Count of blks in the space. */
+        u64 max_order;                  /* Specific maximum order. */
+        struct nvgpu_rbtree_node *alloced_buddies;      /* Outstanding allocations. */
+        struct nvgpu_rbtree_node *fixed_allocs; /* Outstanding fixed allocations. */
+        struct nvgpu_list_node co_list;
+        struct nvgpu_kmem_cache *buddy_cache;
+        /*
+         * Impose an upper bound on the maximum order.
+         */
+#define GPU_BALLOC_ORDER_LIST_LEN       (GPU_BALLOC_MAX_ORDER + 1)
+        struct nvgpu_list_node buddy_list[GPU_BALLOC_ORDER_LIST_LEN];
+        u64 buddy_list_len[GPU_BALLOC_ORDER_LIST_LEN];
+        u64 buddy_list_split[GPU_BALLOC_ORDER_LIST_LEN];
+        u64 buddy_list_alloced[GPU_BALLOC_ORDER_LIST_LEN];
+        /*
+         * This is for when the allocator is managing a GVA space (the
+         * GPU_ALLOC_GVA_SPACE bit is set in @flags). This requires
+         * that we group like sized allocations into PDE blocks.
+         */
+        u64 pte_blk_order;
+        int initialized;
+        int alloc_made;                 /* True after the first alloc. */
+        u64 flags;
+        u64 bytes_alloced;
+        u64 bytes_alloced_real;
+        u64 bytes_freed;
+};
+static inline struct nvgpu_buddy_allocator *buddy_allocator(
+        struct nvgpu_allocator *a)
+{
+        return (struct nvgpu_buddy_allocator *)(a)->priv;
+}
+static inline struct nvgpu_list_node *balloc_get_order_list(
+        struct nvgpu_buddy_allocator *a, int order)
+{
+        return &a->buddy_list[order];
+}
+static inline u64 balloc_order_to_len(struct nvgpu_buddy_allocator *a,
+                                      int order)
+{
+        return (1 << order) * a->blk_size;
+}
+static inline u64 balloc_base_shift(struct nvgpu_buddy_allocator *a,
+                                    u64 base)
+{
+        return base - a->start;
+}
+static inline u64 balloc_base_unshift(struct nvgpu_buddy_allocator *a,
+                                      u64 base)
+{
+        return base + a->start;
+}
+static inline struct nvgpu_allocator *balloc_owner(
+        struct nvgpu_buddy_allocator *a)
+{
+        return a->owner;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/common/mm/comptags.c b/drivers/gpu/nvgpu/common/mm/comptags.c
new file mode 100644
index 00000000..8f2fe90f
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/comptags.c
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <nvgpu/bitops.h>
+#include <nvgpu/comptags.h>
+#include "gk20a/gk20a.h"
+int gk20a_comptaglines_alloc(struct gk20a_comptag_allocator *allocator,
+                             u32 *offset, u32 len)
+{
+        unsigned long addr;
+        int err = 0;
+        nvgpu_mutex_acquire(&allocator->lock);
+        addr = bitmap_find_next_zero_area(allocator->bitmap, allocator->size,
+                        0, len, 0);
+        if (addr < allocator->size) {
+                /* number zero is reserved; bitmap base is 1 */
+                *offset = 1 + addr;
+                bitmap_set(allocator->bitmap, addr, len);
+        } else {
+                err = -ENOMEM;
+        }
+        nvgpu_mutex_release(&allocator->lock);
+        return err;
+}
+void gk20a_comptaglines_free(struct gk20a_comptag_allocator *allocator,
+                             u32 offset, u32 len)
+{
+        /* number zero is reserved; bitmap base is 1 */
+        u32 addr = offset - 1;
+        WARN_ON(offset == 0);
+        WARN_ON(addr > allocator->size);
+        WARN_ON(addr + len > allocator->size);
+        nvgpu_mutex_acquire(&allocator->lock);
+        bitmap_clear(allocator->bitmap, addr, len);
+        nvgpu_mutex_release(&allocator->lock);
+}
+int gk20a_comptag_allocator_init(struct gk20a *g,
+                                 struct gk20a_comptag_allocator *allocator,
+                                 unsigned long size)
+{
+        nvgpu_mutex_init(&allocator->lock);
+        /*
+         * 0th comptag is special and is never used. The base for this bitmap
+         * is 1, and its size is one less than the size of comptag store.
+         */
+        size--;
+        allocator->bitmap = nvgpu_vzalloc(g,
+                                          BITS_TO_LONGS(size) * sizeof(long));
+        if (!allocator->bitmap)
+                return -ENOMEM;
+        allocator->size = size;
+        return 0;
+}
+void gk20a_comptag_allocator_destroy(struct gk20a *g,
+                                     struct gk20a_comptag_allocator *allocator)
+{
+        /*
+         * called only when exiting the driver (gk20a_remove, or unwinding the
+         * init stage); no users should be active, so taking the mutex is
+         * unnecessary here.
+         */
+        allocator->size = 0;
+        nvgpu_vfree(g, allocator->bitmap);
+}
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
new file mode 100644
index 00000000..568da8c4
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -0,0 +1,920 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <uapi/linux/nvgpu.h>
+#include <nvgpu/log.h>
+#include <nvgpu/list.h>
+#include <nvgpu/dma.h>
+#include <nvgpu/gmmu.h>
+#include <nvgpu/nvgpu_mem.h>
+#include <nvgpu/enabled.h>
+#include <nvgpu/page_allocator.h>
+#include <nvgpu/barrier.h>
+#include <nvgpu/vidmem.h>
+#include "gk20a/gk20a.h"
+#include "gk20a/mm_gk20a.h"
+#define __gmmu_dbg(g, attrs, fmt, args...)                              \
+        do {                                                            \
+                if (attrs->debug)                                       \
+                        nvgpu_info(g, fmt, ##args);                     \
+                else                                                    \
+                        nvgpu_log(g, gpu_dbg_map, fmt, ##args);         \
+        } while (0)
+#define __gmmu_dbg_v(g, attrs, fmt, args...)                            \
+        do {                                                            \
+                if (attrs->debug)                                       \
+                        nvgpu_info(g, fmt, ##args);                     \
+                else                                                    \
+                        nvgpu_log(g, gpu_dbg_map_v, fmt, ##args);       \
+        } while (0)
+static int pd_allocate(struct vm_gk20a *vm,
+                       struct nvgpu_gmmu_pd *pd,
+                       const struct gk20a_mmu_level *l,
+                       struct nvgpu_gmmu_attrs *attrs);
+static u32 pd_size(const struct gk20a_mmu_level *l,
+                   struct nvgpu_gmmu_attrs *attrs);
+/*
+ * Core GMMU map function for the kernel to use. If @addr is 0 then the GPU
+ * VA will be allocated for you. If addr is non-zero then the buffer will be
+ * mapped at @addr.
+ */
+static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
+                            struct nvgpu_mem *mem,
+                            u64 addr,
+                            u64 size,
+                            u32 flags,
+                            int rw_flag,
+                            bool priv,
+                            enum nvgpu_aperture aperture)
+{
+        struct gk20a *g = gk20a_from_vm(vm);
+        u64 vaddr;
+        struct nvgpu_sgt *sgt = nvgpu_sgt_create_from_mem(g, mem);
+        if (!sgt)
+                return -ENOMEM;
+        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
+        vaddr = g->ops.mm.gmmu_map(vm, addr,
+                                   sgt,    /* sg list */
+                                   0,      /* sg offset */
+                                   size,
+                                   gmmu_page_size_kernel,
+                                   0,      /* kind */
+                                   0,      /* ctag_offset */
+                                   flags, rw_flag,
+                                   false,  /* clear_ctags */
+                                   false,  /* sparse */
+                                   priv,   /* priv */
+                                   NULL,   /* mapping_batch handle */
+                                   aperture);
+        nvgpu_mutex_release(&vm->update_gmmu_lock);
+        nvgpu_sgt_free(g, sgt);
+        if (!vaddr) {
+                nvgpu_err(g, "failed to map buffer!");
+                return 0;
+        }
+        return vaddr;
+}
+/*
+ * Map a nvgpu_mem into the GMMU. This is for kernel space to use.
+ */
+u64 nvgpu_gmmu_map(struct vm_gk20a *vm,
+                   struct nvgpu_mem *mem,
+                   u64 size,
+                   u32 flags,
+                   int rw_flag,
+                   bool priv,
+                   enum nvgpu_aperture aperture)
+{
+        return __nvgpu_gmmu_map(vm, mem, 0, size, flags, rw_flag, priv,
+                        aperture);
+}
+/*
+ * Like nvgpu_gmmu_map() except this can work on a fixed address.
+ */
+u64 nvgpu_gmmu_map_fixed(struct vm_gk20a *vm,
+                         struct nvgpu_mem *mem,
+                         u64 addr,
+                         u64 size,
+                         u32 flags,
+                         int rw_flag,
+                         bool priv,
+                         enum nvgpu_aperture aperture)
+{
+        return __nvgpu_gmmu_map(vm, mem, addr, size, flags, rw_flag, priv,
+                        aperture);
+}
+void nvgpu_gmmu_unmap(struct vm_gk20a *vm, struct nvgpu_mem *mem, u64 gpu_va)
+{
+        struct gk20a *g = gk20a_from_vm(vm);
+        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
+        g->ops.mm.gmmu_unmap(vm,
+                             gpu_va,
+                             mem->size,
+                             gmmu_page_size_kernel,
+                             true, /*va_allocated */
+                             gk20a_mem_flag_none,
+                             false,
+                             NULL);
+        nvgpu_mutex_release(&vm->update_gmmu_lock);
+}
+int nvgpu_gmmu_init_page_table(struct vm_gk20a *vm)
+{
+        u32 pdb_size;
+        int err;
+        /*
+         * Need this just for page size. Everything else can be ignored. Also
+         * note that we can just use pgsz 0 (i.e small pages) since the number
+         * of bits present in the top level PDE are the same for small/large
+         * page VMs.
+         */
+        struct nvgpu_gmmu_attrs attrs = {
+                .pgsz = 0,
+        };
+        /*
+         * PDB size here must be one page so that its address is page size
+         * aligned. Although lower PDE tables can be aligned at 256B boundaries
+         * the main PDB must be page aligned.
+         */
+        pdb_size = ALIGN(pd_size(&vm->mmu_levels[0], &attrs), PAGE_SIZE);
+        err = __nvgpu_pd_cache_alloc_direct(vm->mm->g, &vm->pdb, pdb_size);
+        if (WARN_ON(err))
+                return err;
+        /*
+         * One nvgpu_smp_mb() is done after all mapping operations. Don't need
+         * individual barriers for each PD write.
+         */
+        vm->pdb.mem->skip_wmb = true;
+        return 0;
+}
+/*
+ * Ensure that there's a CPU mapping for the page directory memory. This won't
+ * always be the case for 32 bit systems since we may need to save kernel
+ * virtual memory.
+ */
+static int map_gmmu_pages(struct gk20a *g, struct nvgpu_gmmu_pd *pd)
+{
+        return nvgpu_mem_begin(g, pd->mem);
+}
+/*
+ * Handle any necessary CPU unmap semantics for a page directories DMA memory.
+ * For 64 bit platforms this is a noop.
+ */
+static void unmap_gmmu_pages(struct gk20a *g, struct nvgpu_gmmu_pd *pd)
+{
+        nvgpu_mem_end(g, pd->mem);
+}
+/*
+ * Return the _physical_ address of a page directory.
+ */
+u64 nvgpu_pde_phys_addr(struct gk20a *g, struct nvgpu_gmmu_pd *pd)
+{
+        u64 page_addr;
+        if (g->mm.has_physical_mode)
+                page_addr = nvgpu_mem_get_phys_addr(g, pd->mem);
+        else
+                page_addr = nvgpu_mem_get_addr(g, pd->mem);
+        return page_addr + pd->mem_offs;
+}
+/*
+ * Return the aligned length based on the page size in attrs.
+ */
+static u64 nvgpu_align_map_length(struct vm_gk20a *vm, u64 length,
+                                  struct nvgpu_gmmu_attrs *attrs)
+{
+        u64 page_size = vm->gmmu_page_sizes[attrs->pgsz];
+        return ALIGN(length, page_size);
+}
+static u32 pd_entries(const struct gk20a_mmu_level *l,
+                      struct nvgpu_gmmu_attrs *attrs)
+{
+        /*
+         * Number of entries in a PD is easy to compute from the number of bits
+         * used to index the page directory. That is simply 2 raised to the
+         * number of bits.
+         */
+        return 1UL << (l->hi_bit[attrs->pgsz] - l->lo_bit[attrs->pgsz] + 1UL);
+}
+/*
+ * Computes the size of a PD table.
+ */
+static u32 pd_size(const struct gk20a_mmu_level *l,
+                   struct nvgpu_gmmu_attrs *attrs)
+{
+        return pd_entries(l, attrs) * l->entry_size;
+}
+/*
+ * Allocate a physically contiguous region big enough for a gmmu page table
+ * of the specified level and page size. The whole range is zeroed so that any
+ * accesses will fault until proper values are programmed.
+ */
+static int pd_allocate(struct vm_gk20a *vm,
+                       struct nvgpu_gmmu_pd *pd,
+                       const struct gk20a_mmu_level *l,
+                       struct nvgpu_gmmu_attrs *attrs)
+{
+        int err;
+        if (pd->mem)
+                return 0;
+        err = __nvgpu_pd_alloc(vm, pd, pd_size(l, attrs));
+        if (err) {
+                nvgpu_info(vm->mm->g, "error allocating page directory!");
+                return err;
+        }
+        /*
+         * One nvgpu_smp_mb() is done after all mapping operations. Don't need
+         * individual barriers for each PD write.
+         */
+        pd->mem->skip_wmb = true;
+        return 0;
+}
+/*
+ * Compute what page directory index at the passed level the passed virtual
+ * address corresponds to. @attrs is necessary for determining the page size
+ * which is used to pick the right bit offsets for the GMMU level.
+ */
+static u32 pd_index(const struct gk20a_mmu_level *l, u64 virt,
+                    struct nvgpu_gmmu_attrs *attrs)
+{
+        u64 pd_mask = (1ULL << ((u64)l->hi_bit[attrs->pgsz] + 1)) - 1ULL;
+        u32 pd_shift = (u64)l->lo_bit[attrs->pgsz];
+        /*
+         * For convenience we don't bother computing the lower bound of the
+         * mask; it's easier to just shift it off.
+         */
+        return (virt & pd_mask) >> pd_shift;
+}
+static int pd_allocate_children(struct vm_gk20a *vm,
+                                const struct gk20a_mmu_level *l,
+                                struct nvgpu_gmmu_pd *pd,
+                                struct nvgpu_gmmu_attrs *attrs)
+{
+        struct gk20a *g = gk20a_from_vm(vm);
+        if (pd->entries)
+                return 0;
+        pd->num_entries = pd_entries(l, attrs);
+        pd->entries = nvgpu_vzalloc(g, sizeof(struct nvgpu_gmmu_pd) *
+                                    pd->num_entries);
+        if (!pd->entries)
+                return -ENOMEM;
+        return 0;
+}
+/*
+ * This function programs the GMMU based on two ranges: a physical range and a
+ * GPU virtual range. The virtual is mapped to the physical. Physical in this
+ * case can mean either a real physical sysmem address or a IO virtual address
+ * (for instance when a system has an IOMMU running).
+ *
+ * The rest of the parameters are for describing the actual mapping itself.
+ *
+ * This function recursively calls itself for handling PDEs. At the final level
+ * a PTE handler is called. The phys and virt ranges are adjusted for each
+ * recursion so that each invocation of this function need only worry about the
+ * range it is passed.
+ *
+ * phys_addr will always point to a contiguous range - the discontiguous nature
+ * of DMA buffers is taken care of at the layer above this.
+ */
+static int __set_pd_level(struct vm_gk20a *vm,
+                          struct nvgpu_gmmu_pd *pd,
+                          int lvl,
+                          u64 phys_addr,
+                          u64 virt_addr, u64 length,
+                          struct nvgpu_gmmu_attrs *attrs)
+{
+        int err = 0;
+        u64 pde_range;
+        struct gk20a *g = gk20a_from_vm(vm);
+        struct nvgpu_gmmu_pd *next_pd = NULL;
+        const struct gk20a_mmu_level *l      = &vm->mmu_levels[lvl];
+        const struct gk20a_mmu_level *next_l = &vm->mmu_levels[lvl + 1];
+        /*
+         * 5 levels for Pascal+. For pre-pascal we only have 2. This puts
+         * offsets into the page table debugging code which makes it easier to
+         * see what level prints are from.
+         */
+        static const char *__lvl_debug[] = {
+                "",          /* L=0 */
+                "  ",        /* L=1 */
+                "    ",      /* L=2 */
+                "      ",    /* L=3 */
+                "        ",  /* L=4 */
+        };
+        pde_range = 1ULL << (u64)l->lo_bit[attrs->pgsz];
+        __gmmu_dbg_v(g, attrs,
+                     "L=%d   %sGPU virt %#-12llx +%#-9llx -> phys %#-12llx",
+                     lvl,
+                     __lvl_debug[lvl],
+                     virt_addr,
+                     length,
+                     phys_addr);
+        /*
+         * Iterate across the mapping in chunks the size of this level's PDE.
+         * For each of those chunks program our level's PDE and then, if there's
+         * a next level, program the next level's PDEs/PTEs.
+         */
+        while (length) {
+                u32 pd_idx = pd_index(l, virt_addr, attrs);
+                u64 chunk_size;
+                u64 target_addr;
+                /*
+                 * Truncate the pde_range when the virtual address does not
+                 * start at a PDE boundary.
+                 */
+                chunk_size = min(length,
+                                 pde_range - (virt_addr & (pde_range - 1)));
+                /*
+                 * If the next level has an update_entry function then we know
+                 * that _this_ level points to PDEs (not PTEs). Thus we need to
+                 * have a bunch of children PDs.
+                 */
+                if (next_l->update_entry) {
+                        if (pd_allocate_children(vm, l, pd, attrs))
+                                return -ENOMEM;
+                        /*
+                         * Get the next PD so that we know what to put in this
+                         * current PD. If the next level is actually PTEs then
+                         * we don't need this - we will just use the real
+                         * physical target.
+                         */
+                        next_pd = &pd->entries[pd_idx];
+                        /*
+                         * Allocate the backing memory for next_pd.
+                         */
+                        if (pd_allocate(vm, next_pd, next_l, attrs))
+                                return -ENOMEM;
+                }
+                /*
+                 * This is the address we want to program into the actual PDE/
+                 * PTE. When the next level is PDEs we need the target address
+                 * to be the table of PDEs. When the next level is PTEs the
+                 * target addr is the real physical address we are aiming for.
+                 */
+                target_addr = next_pd ?
+                        nvgpu_pde_phys_addr(g, next_pd) :
+                        phys_addr;
+                l->update_entry(vm, l,
+                                pd, pd_idx,
+                                virt_addr,
+                                target_addr,
+                                attrs);
+                if (next_l->update_entry) {
+                        err = map_gmmu_pages(g, next_pd);
+                        if (err) {
+                                nvgpu_err(g,
+                                          "couldn't map ptes for update as=%d",
+                                          vm_aspace_id(vm));
+                                return err;
+                        }
+                        err = __set_pd_level(vm, next_pd,
+                                             lvl + 1,
+                                             phys_addr,
+                                             virt_addr,
+                                             chunk_size,
+                                             attrs);
+                        unmap_gmmu_pages(g, next_pd);
+                        if (err)
+                                return err;
+                }
+                virt_addr += chunk_size;
+                /*
+                 * Only add to phys_addr if it's non-zero. A zero value implies
+                 * we are unmapping as as a result we don't want to place
+                 * non-zero phys addresses in the PTEs. A non-zero phys-addr
+                 * would also confuse the lower level PTE programming code.
+                 */
+                if (phys_addr)
+                        phys_addr += chunk_size;
+                length -= chunk_size;
+        }
+        __gmmu_dbg_v(g, attrs, "L=%d   %s%s", lvl, __lvl_debug[lvl], "ret!");
+        return 0;
+}
+static int __nvgpu_gmmu_do_update_page_table(struct vm_gk20a *vm,
+                                             struct nvgpu_sgt *sgt,
+                                             u64 space_to_skip,
+                                             u64 virt_addr,
+                                             u64 length,
+                                             struct nvgpu_gmmu_attrs *attrs)
+{
+        struct gk20a *g = gk20a_from_vm(vm);
+        void *sgl;
+        int err = 0;
+        if (!sgt) {
+                /*
+                 * This is considered an unmap. Just pass in 0 as the physical
+                 * address for the entire GPU range.
+                 */
+                err = __set_pd_level(vm, &vm->pdb,
+                                     0,
+                                     0,
+                                     virt_addr, length,
+                                     attrs);
+                return err;
+        }
+        /*
+         * At this point we have a scatter-gather list pointing to some number
+         * of discontiguous chunks of memory. We must iterate over that list and
+         * generate a GMMU map call for each chunk. There are two possibilities:
+         * either an IOMMU is enabled or not. When an IOMMU is enabled the
+         * mapping is simple since the "physical" address is actually a virtual
+         * IO address and will be contiguous.
+         */
+        if (attrs->aperture == APERTURE_SYSMEM && !g->mm.bypass_smmu) {
+                u64 io_addr = nvgpu_sgt_get_gpu_addr(g, sgt, sgt->sgl, attrs);
+                io_addr += space_to_skip;
+                err = __set_pd_level(vm, &vm->pdb,
+                                     0,
+                                     io_addr,
+                                     virt_addr,
+                                     length,
+                                     attrs);
+                return err;
+        }
+        /*
+         * Finally: last possible case: do the no-IOMMU mapping. In this case we
+         * really are mapping physical pages directly.
+         */
+        nvgpu_sgt_for_each_sgl(sgl, sgt) {
+                u64 phys_addr;
+                u64 chunk_length;
+                /*
+                 * Cut out sgl ents for space_to_skip.
+                 */
+                if (space_to_skip &&
+                    space_to_skip >= nvgpu_sgt_get_length(sgt, sgl)) {
+                        space_to_skip -= nvgpu_sgt_get_length(sgt, sgl);
+                        continue;
+                }
+                phys_addr = nvgpu_sgt_get_phys(sgt, sgl) + space_to_skip;
+                chunk_length = min(length,
+                        nvgpu_sgt_get_length(sgt, sgl) - space_to_skip);
+                err = __set_pd_level(vm, &vm->pdb,
+                                     0,
+                                     phys_addr,
+                                     virt_addr,
+                                     chunk_length,
+                                     attrs);
+                if (err)
+                        break;
+                /* Space has been skipped so zero this for future chunks. */
+                space_to_skip = 0;
+                /*
+                 * Update the map pointer and the remaining length.
+                 */
+                virt_addr += chunk_length;
+                length    -= chunk_length;
+                if (length == 0)
+                        break;
+        }
+        return err;
+}
+/*
+ * This is the true top level GMMU mapping logic. This breaks down the incoming
+ * scatter gather table and does actual programming of GPU virtual address to
+ * physical* address.
+ *
+ * The update of each level of the page tables is farmed out to chip specific
+ * implementations. But the logic around that is generic to all chips. Every
+ * chip has some number of PDE levels and then a PTE level.
+ *
+ * Each chunk of the incoming SGL is sent to the chip specific implementation
+ * of page table update.
+ *
+ * [*] Note: the "physical" address may actually be an IO virtual address in the
+ *     case of SMMU usage.
+ */
+static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
+                                          struct nvgpu_sgt *sgt,
+                                          u64 space_to_skip,
+                                          u64 virt_addr,
+                                          u64 length,
+                                          struct nvgpu_gmmu_attrs *attrs)
+{
+        struct gk20a *g = gk20a_from_vm(vm);
+        u32 page_size;
+        int err;
+        /* note: here we need to map kernel to small, since the
+         * low-level mmu code assumes 0 is small and 1 is big pages */
+        if (attrs->pgsz == gmmu_page_size_kernel)
+                attrs->pgsz = gmmu_page_size_small;
+        page_size = vm->gmmu_page_sizes[attrs->pgsz];
+        if (space_to_skip & (page_size - 1))
+                return -EINVAL;
+        /*
+         * Update length to be aligned to the passed page size.
+         */
+        length = nvgpu_align_map_length(vm, length, attrs);
+        err = map_gmmu_pages(g, &vm->pdb);
+        if (err) {
+                nvgpu_err(g, "couldn't map ptes for update as=%d",
+                          vm_aspace_id(vm));
+                return err;
+        }
+        __gmmu_dbg(g, attrs,
+                   "vm=%s "
+                   "%-5s GPU virt %#-12llx +%#-9llx    phys %#-12llx "
+                   "phys offset: %#-4llx;  pgsz: %3dkb perm=%-2s | "
+                   "kind=%#02x APT=%-6s %c%c%c%c%c",
+                   vm->name,
+                   sgt ? "MAP" : "UNMAP",
+                   virt_addr,
+                   length,
+                   sgt ? nvgpu_sgt_get_phys(sgt, sgt->sgl) : 0,
+                   space_to_skip,
+                   page_size >> 10,
+                   nvgpu_gmmu_perm_str(attrs->rw_flag),
+                   attrs->kind_v,
+                   nvgpu_aperture_str(attrs->aperture),
+                   attrs->cacheable ? 'C' : 'c', /* C = cached, V = volatile. */
+                   attrs->sparse    ? 'S' : '-',
+                   attrs->priv      ? 'P' : '-',
+                   attrs->coherent  ? 'c' : '-',
+                   attrs->valid     ? 'V' : '-');
+        err = __nvgpu_gmmu_do_update_page_table(vm,
+                                                sgt,
+                                                space_to_skip,
+                                                virt_addr,
+                                                length,
+                                                attrs);
+        unmap_gmmu_pages(g, &vm->pdb);
+        nvgpu_smp_mb();
+        __gmmu_dbg(g, attrs, "%-5s Done!", sgt ? "MAP" : "UNMAP");
+        return err;
+}
+/**
+ * gk20a_locked_gmmu_map - Map a buffer into the GMMU
+ *
+ * This is for non-vGPU chips. It's part of the HAL at the moment but really
+ * should not be. Chip specific stuff is handled at the PTE/PDE programming
+ * layer. The rest of the logic is essentially generic for all chips.
+ *
+ * To call this function you must have locked the VM lock: vm->update_gmmu_lock.
+ * However, note: this function is not called directly. It's used through the
+ * mm.gmmu_lock() HAL. So before calling the mm.gmmu_lock() HAL make sure you
+ * have the update_gmmu_lock aquired.
+ */
+u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
+                          u64 vaddr,
+                          struct nvgpu_sgt *sgt,
+                          u64 buffer_offset,
+                          u64 size,
+                          int pgsz_idx,
+                          u8 kind_v,
+                          u32 ctag_offset,
+                          u32 flags,
+                          int rw_flag,
+                          bool clear_ctags,
+                          bool sparse,
+                          bool priv,
+                          struct vm_gk20a_mapping_batch *batch,
+                          enum nvgpu_aperture aperture)
+{
+        struct gk20a *g = gk20a_from_vm(vm);
+        int err = 0;
+        bool allocated = false;
+        int ctag_granularity = g->ops.fb.compression_page_size(g);
+        struct nvgpu_gmmu_attrs attrs = {
+                .pgsz      = pgsz_idx,
+                .kind_v    = kind_v,
+                .ctag      = (u64)ctag_offset * (u64)ctag_granularity,
+                .cacheable = flags & NVGPU_AS_MAP_BUFFER_FLAGS_CACHEABLE,
+                .rw_flag   = rw_flag,
+                .sparse    = sparse,
+                .priv      = priv,
+                .coherent  = flags & NVGPU_AS_MAP_BUFFER_FLAGS_IO_COHERENT,
+                .valid     = !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_UNMAPPED_PTE),
+                .aperture  = aperture
+        };
+#ifdef CONFIG_TEGRA_19x_GPU
+        nvgpu_gmmu_add_t19x_attrs(&attrs, flags);
+#endif
+        /*
+         * Only allocate a new GPU VA range if we haven't already been passed a
+         * GPU VA range. This facilitates fixed mappings.
+         */
+        if (!vaddr) {
+                vaddr = __nvgpu_vm_alloc_va(vm, size, pgsz_idx);
+                if (!vaddr) {
+                        nvgpu_err(g, "failed to allocate va space");
+                        err = -ENOMEM;
+                        goto fail_alloc;
+                }
+                allocated = true;
+        }
+        err = __nvgpu_gmmu_update_page_table(vm, sgt, buffer_offset,
+                                             vaddr, size, &attrs);
+        if (err) {
+                nvgpu_err(g, "failed to update ptes on map");
+                goto fail_validate;
+        }
+        if (!batch)
+                g->ops.fb.tlb_invalidate(g, vm->pdb.mem);
+        else
+                batch->need_tlb_invalidate = true;
+        return vaddr;
+fail_validate:
+        if (allocated)
+                __nvgpu_vm_free_va(vm, vaddr, pgsz_idx);
+fail_alloc:
+        nvgpu_err(g, "%s: failed with err=%d", __func__, err);
+        return 0;
+}
+void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
+                             u64 vaddr,
+                             u64 size,
+                             int pgsz_idx,
+                             bool va_allocated,
+                             int rw_flag,
+                             bool sparse,
+                             struct vm_gk20a_mapping_batch *batch)
+{
+        int err = 0;
+        struct gk20a *g = gk20a_from_vm(vm);
+        struct nvgpu_gmmu_attrs attrs = {
+                .pgsz      = pgsz_idx,
+                .kind_v    = 0,
+                .ctag      = 0,
+                .cacheable = 0,
+                .rw_flag   = rw_flag,
+                .sparse    = sparse,
+                .priv      = 0,
+                .coherent  = 0,
+                .valid     = 0,
+                .aperture  = APERTURE_INVALID,
+        };
+        if (va_allocated) {
+                err = __nvgpu_vm_free_va(vm, vaddr, pgsz_idx);
+                if (err) {
+                        nvgpu_err(g, "failed to free va");
+                        return;
+                }
+        }
+        /* unmap here needs to know the page size we assigned at mapping */
+        err = __nvgpu_gmmu_update_page_table(vm, NULL, 0,
+                                             vaddr, size, &attrs);
+        if (err)
+                nvgpu_err(g, "failed to update gmmu ptes on unmap");
+        if (!batch) {
+                gk20a_mm_l2_flush(g, true);
+                g->ops.fb.tlb_invalidate(g, vm->pdb.mem);
+        } else {
+                if (!batch->gpu_l2_flushed) {
+                        gk20a_mm_l2_flush(g, true);
+                        batch->gpu_l2_flushed = true;
+                }
+                batch->need_tlb_invalidate = true;
+        }
+}
+u32 __nvgpu_pte_words(struct gk20a *g)
+{
+        const struct gk20a_mmu_level *l = g->ops.mm.get_mmu_levels(g, SZ_64K);
+        const struct gk20a_mmu_level *next_l;
+        /*
+         * Iterate to the bottom GMMU level - the PTE level. The levels array
+         * is always NULL terminated (by the update_entry function).
+         */
+        do {
+                next_l = l + 1;
+                if (!next_l->update_entry)
+                        break;
+                l++;
+        } while (true);
+        return (u32)(l->entry_size / sizeof(u32));
+}
+/*
+ * Recursively walk the pages tables to find the PTE.
+ */
+static int __nvgpu_locate_pte(struct gk20a *g, struct vm_gk20a *vm,
+                              struct nvgpu_gmmu_pd *pd,
+                              u64 vaddr, int lvl,
+                              struct nvgpu_gmmu_attrs *attrs,
+                              u32 *data,
+                              struct nvgpu_gmmu_pd **pd_out, u32 *pd_idx_out,
+                              u32 *pd_offs_out)
+{
+        const struct gk20a_mmu_level *l      = &vm->mmu_levels[lvl];
+        const struct gk20a_mmu_level *next_l = &vm->mmu_levels[lvl + 1];
+        u32 pd_idx = pd_index(l, vaddr, attrs);
+        u32 pte_base;
+        u32 pte_size;
+        u32 i;
+        /*
+         * If this isn't the final level (i.e there's a valid next level)
+         * then find the next level PD and recurse.
+         */
+        if (next_l->update_entry) {
+                struct nvgpu_gmmu_pd *pd_next = pd->entries + pd_idx;
+                /* Invalid entry! */
+                if (!pd_next->mem)
+                        return -EINVAL;
+                attrs->pgsz = l->get_pgsz(g, pd, pd_idx);
+                if (attrs->pgsz >= gmmu_nr_page_sizes)
+                        return -EINVAL;
+                return __nvgpu_locate_pte(g, vm, pd_next,
+                                          vaddr, lvl + 1, attrs,
+                                          data, pd_out, pd_idx_out,
+                                          pd_offs_out);
+        }
+        if (!pd->mem)
+                return -EINVAL;
+        /*
+         * Take into account the real offset into the nvgpu_mem since the PD
+         * may be located at an offset other than 0 (due to PD packing).
+         */
+        pte_base = (pd->mem_offs / sizeof(u32)) +
+                pd_offset_from_index(l, pd_idx);
+        pte_size = (u32)(l->entry_size / sizeof(u32));
+        if (data) {
+                map_gmmu_pages(g, pd);
+                for (i = 0; i < pte_size; i++)
+                        data[i] = nvgpu_mem_rd32(g, pd->mem, pte_base + i);
+                unmap_gmmu_pages(g, pd);
+        }
+        if (pd_out)
+                *pd_out = pd;
+        if (pd_idx_out)
+                *pd_idx_out = pd_idx;
+        if (pd_offs_out)
+                *pd_offs_out = pd_offset_from_index(l, pd_idx);
+        return 0;
+}
+int __nvgpu_get_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte)
+{
+        struct nvgpu_gmmu_attrs attrs = {
+                .pgsz = 0,
+        };
+        return __nvgpu_locate_pte(g, vm, &vm->pdb,
+                                  vaddr, 0, &attrs,
+                                  pte, NULL, NULL, NULL);
+}
+int __nvgpu_set_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte)
+{
+        struct nvgpu_gmmu_pd *pd;
+        u32 pd_idx, pd_offs, pte_size, i;
+        int err;
+        struct nvgpu_gmmu_attrs attrs = {
+                .pgsz = 0,
+        };
+        struct nvgpu_gmmu_attrs *attrs_ptr = &attrs;
+        err = __nvgpu_locate_pte(g, vm, &vm->pdb,
+                                 vaddr, 0, &attrs,
+                                 NULL, &pd, &pd_idx, &pd_offs);
+        if (err)
+                return err;
+        pte_size = __nvgpu_pte_words(g);
+        map_gmmu_pages(g, pd);
+        for (i = 0; i < pte_size; i++) {
+                pd_write(g, pd, pd_offs + i, pte[i]);
+                pte_dbg(g, attrs_ptr,
+                        "PTE: idx=%-4u (%d) 0x%08x", pd_idx, i, pte[i]);
+        }
+        unmap_gmmu_pages(g, pd);
+        /*
+         * Ensures the pd_write()s are done. The pd_write() does not do this
+         * since generally there's lots of pd_write()s called one after another.
+         * There probably also needs to be a TLB invalidate as well but we leave
+         * that to the caller of this function.
+         */
+        nvgpu_smp_wmb();
+        return 0;
+}
diff --git a/drivers/gpu/nvgpu/common/mm/lockless_allocator.c b/drivers/gpu/nvgpu/common/mm/lockless_allocator.c
new file mode 100644
index 00000000..3eb10fc4
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/lockless_allocator.c
@@ -0,0 +1,225 @@
+/*
+ * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <nvgpu/atomic.h>
+#include <nvgpu/allocator.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/barrier.h>
+#include "lockless_allocator_priv.h"
+static u64 nvgpu_lockless_alloc_length(struct nvgpu_allocator *a)
+{
+        struct nvgpu_lockless_allocator *pa = a->priv;
+        return pa->length;
+}
+static u64 nvgpu_lockless_alloc_base(struct nvgpu_allocator *a)
+{
+        struct nvgpu_lockless_allocator *pa = a->priv;
+        return pa->base;
+}
+static int nvgpu_lockless_alloc_inited(struct nvgpu_allocator *a)
+{
+        struct nvgpu_lockless_allocator *pa = a->priv;
+        int inited = pa->inited;
+        nvgpu_smp_rmb();
+        return inited;
+}
+static u64 nvgpu_lockless_alloc_end(struct nvgpu_allocator *a)
+{
+        struct nvgpu_lockless_allocator *pa = a->priv;
+        return pa->base + pa->length;
+}
+static u64 nvgpu_lockless_alloc(struct nvgpu_allocator *a, u64 len)
+{
+        struct nvgpu_lockless_allocator *pa = a->priv;
+        int head, new_head, ret;
+        u64 addr = 0;
+        if (len != pa->blk_size)
+                return 0;
+        head = NV_ACCESS_ONCE(pa->head);
+        while (head >= 0) {
+                new_head = NV_ACCESS_ONCE(pa->next[head]);
+                ret = cmpxchg(&pa->head, head, new_head);
+                if (ret == head) {
+                        addr = pa->base + head * pa->blk_size;
+                        nvgpu_atomic_inc(&pa->nr_allocs);
+                        alloc_dbg(a, "Alloc node # %d @ addr 0x%llx\n", head,
+                                  addr);
+                        break;
+                }
+                head = NV_ACCESS_ONCE(pa->head);
+        }
+        if (addr)
+                alloc_dbg(a, "Alloc node # %d @ addr 0x%llx\n", head, addr);
+        else
+                alloc_dbg(a, "Alloc failed!\n");
+        return addr;
+}
+static void nvgpu_lockless_free(struct nvgpu_allocator *a, u64 addr)
+{
+        struct nvgpu_lockless_allocator *pa = a->priv;
+        int head, ret;
+        u64 cur_idx;
+        cur_idx = (addr - pa->base) / pa->blk_size;
+        alloc_dbg(a, "Free node # %llu @ addr 0x%llx\n", cur_idx, addr);
+        while (1) {
+                head = NV_ACCESS_ONCE(pa->head);
+                NV_ACCESS_ONCE(pa->next[cur_idx]) = head;
+                ret = cmpxchg(&pa->head, head, cur_idx);
+                if (ret == head) {
+                        nvgpu_atomic_dec(&pa->nr_allocs);
+                        alloc_dbg(a, "Free node # %llu\n", cur_idx);
+                        break;
+                }
+        }
+}
+static void nvgpu_lockless_alloc_destroy(struct nvgpu_allocator *a)
+{
+        struct nvgpu_lockless_allocator *pa = a->priv;
+#ifdef CONFIG_DEBUG_FS
+        nvgpu_fini_alloc_debug(a);
+#endif
+        nvgpu_vfree(a->g, pa->next);
+        nvgpu_kfree(nvgpu_alloc_to_gpu(a), pa);
+}
+#ifdef __KERNEL__
+static void nvgpu_lockless_print_stats(struct nvgpu_allocator *a,
+                                   struct seq_file *s, int lock)
+{
+        struct nvgpu_lockless_allocator *pa = a->priv;
+        __alloc_pstat(s, a, "Lockless allocator params:\n");
+        __alloc_pstat(s, a, "  start = 0x%llx\n", pa->base);
+        __alloc_pstat(s, a, "  end   = 0x%llx\n", pa->base + pa->length);
+        /* Actual stats. */
+        __alloc_pstat(s, a, "Stats:\n");
+        __alloc_pstat(s, a, "  Number allocs = %d\n",
+                      nvgpu_atomic_read(&pa->nr_allocs));
+        __alloc_pstat(s, a, "  Number free   = %d\n",
+                      pa->nr_nodes - nvgpu_atomic_read(&pa->nr_allocs));
+}
+#endif
+static const struct nvgpu_allocator_ops pool_ops = {
+        .alloc          = nvgpu_lockless_alloc,
+        .free           = nvgpu_lockless_free,
+        .base           = nvgpu_lockless_alloc_base,
+        .length         = nvgpu_lockless_alloc_length,
+        .end            = nvgpu_lockless_alloc_end,
+        .inited         = nvgpu_lockless_alloc_inited,
+        .fini           = nvgpu_lockless_alloc_destroy,
+#ifdef __KERNEL__
+        .print_stats    = nvgpu_lockless_print_stats,
+#endif
+};
+int nvgpu_lockless_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
+                              const char *name, u64 base, u64 length,
+                              u64 blk_size, u64 flags)
+{
+        int i;
+        int err;
+        int nr_nodes;
+        u64 count;
+        struct nvgpu_lockless_allocator *a;
+        if (!blk_size)
+                return -EINVAL;
+        /*
+         * Ensure we have space for at least one node & there's no overflow.
+         * In order to control memory footprint, we require count < INT_MAX
+         */
+        count = length / blk_size;
+        if (!base || !count || count > INT_MAX)
+                return -EINVAL;
+        a = nvgpu_kzalloc(g, sizeof(struct nvgpu_lockless_allocator));
+        if (!a)
+                return -ENOMEM;
+        err = __nvgpu_alloc_common_init(__a, g, name, a, false, &pool_ops);
+        if (err)
+                goto fail;
+        a->next = nvgpu_vzalloc(g, sizeof(*a->next) * count);
+        if (!a->next) {
+                err = -ENOMEM;
+                goto fail;
+        }
+        /* chain the elements together to form the initial free list  */
+        nr_nodes = (int)count;
+        for (i = 0; i < nr_nodes; i++)
+                a->next[i] = i + 1;
+        a->next[nr_nodes - 1] = -1;
+        a->base = base;
+        a->length = length;
+        a->blk_size = blk_size;
+        a->nr_nodes = nr_nodes;
+        a->flags = flags;
+        nvgpu_atomic_set(&a->nr_allocs, 0);
+        nvgpu_smp_wmb();
+        a->inited = true;
+#ifdef CONFIG_DEBUG_FS
+        nvgpu_init_alloc_debug(g, __a);
+#endif
+        alloc_dbg(__a, "New allocator: type          lockless\n");
+        alloc_dbg(__a, "               base          0x%llx\n", a->base);
+        alloc_dbg(__a, "               nodes         %d\n", a->nr_nodes);
+        alloc_dbg(__a, "               blk_size      0x%llx\n", a->blk_size);
+        alloc_dbg(__a, "               flags         0x%llx\n", a->flags);
+        return 0;
+fail:
+        nvgpu_kfree(g, a);
+        return err;
+}
diff --git a/drivers/gpu/nvgpu/common/mm/lockless_allocator_priv.h b/drivers/gpu/nvgpu/common/mm/lockless_allocator_priv.h
new file mode 100644
index 00000000..c2f6649a
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/lockless_allocator_priv.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2016 - 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+/*
+ * Basics:
+ *
+ *    - Lockless memory allocator for fixed-size structures, whose
+ *      size is defined up front at init time.
+ *    - Memory footprint scales linearly w/ the number of structures in
+ *      the pool. It is ~= sizeof(int) * N.
+ *    - Memory is pre-allocated by the client. The allocator itself
+ *      only computes the addresses for allocations.
+ *    - Limit of MAX_INT nodes that the allocator can be responsible for.
+ *
+ * Implementation details:
+ *
+ *    The allocator maintains a single list of free nodes. We allocate &
+ *    free nodes from the head of the list. We rely on the cmpxchg() operator
+ *    to maintain atomicity on the head.
+ *
+ *    So, both allocs & frees are O(1)!!
+ *
+ *    -- Definitions --
+ *    Block Size - size of a single structure that this allocator will
+ *                 allocate.
+ *    Node       - one of the elements of size blk_size in the
+ *                 client-allocated buffer.
+ *    Node Index - zero-based index of a node in the client-allocated
+ *                 contiguous buffer.
+ *
+ *    -- Initial State --
+ *    We maintain the following to track the state of the free list:
+ *
+ *    1) A "head" index to track the index of the first free node in the list
+ *    2) A "next" array to track the index of the next free node in the list
+ *       for every node. So next[head], will give the index to the 2nd free
+ *       element in the list.
+ *
+ *    So, to begin with, the free list consists of all node indices, and each
+ *    position in the next array contains index N + 1:
+ *
+ *    head = 0
+ *    next = [1, 2, 3, 4, -1] : Example for a user-allocated buffer of 5 nodes
+ *    free_list = 0->1->2->3->4->-1
+ *
+ *    -- Allocations --
+ *    1) Read the current head (aka acq_head)
+ *    2) Read next[acq_head], to get the 2nd free element (aka new_head)
+ *    3) cmp_xchg(&head, acq_head, new_head)
+ *    4) If it succeeds, compute the address of the node, based on
+ *       base address, blk_size, & acq_head.
+ *
+ *    head = 1;
+ *    next = [1, 2, 3, 4, -1] : Example after allocating Node #0
+ *    free_list = 1->2->3->4->-1
+ *
+ *    head = 2;
+ *    next = [1, 2, 3, 4, -1] : Example after allocating Node #1
+ *    free_list = 2->3->4->-1
+ *
+ *    -- Frees --
+ *    1) Based on the address to be freed, calculate the index of the node
+ *       being freed (cur_idx)
+ *    2) Read the current head (old_head)
+ *    3) So the freed node is going to go at the head of the list, and we
+ *       want to put the old_head after it. So next[cur_idx] = old_head
+ *    4) cmpxchg(head, old_head, cur_idx)
+ *
+ *    head = 0
+ *    next = [2, 2, 3, 4, -1]
+ *    free_list = 0->2->3->4->-1 : Example after freeing Node #0
+ *
+ *    head = 1
+ *    next = [2, 0, 3, 4, -1]
+ *    free_list = 1->0->2->3->4->-1 : Example after freeing Node #1
+ */
+#ifndef LOCKLESS_ALLOCATOR_PRIV_H
+#define LOCKLESS_ALLOCATOR_PRIV_H
+struct nvgpu_allocator;
+struct nvgpu_lockless_allocator {
+        struct nvgpu_allocator *owner;
+        u64 base;               /* Base address of the space. */
+        u64 length;             /* Length of the space. */
+        u64 blk_size;           /* Size of the structure being allocated */
+        int nr_nodes;           /* Number of nodes available for allocation */
+        int *next;              /* An array holding the next indices per node */
+        int head;               /* Current node at the top of the stack */
+        u64 flags;
+        bool inited;
+        /* Statistics */
+        nvgpu_atomic_t nr_allocs;
+};
+static inline struct nvgpu_lockless_allocator *lockless_allocator(
+        struct nvgpu_allocator *a)
+{
+        return (struct nvgpu_lockless_allocator *)(a)->priv;
+}
+#endif
diff --git a/drivers/gpu/nvgpu/common/mm/mm.c b/drivers/gpu/nvgpu/common/mm/mm.c
new file mode 100644
index 00000000..db87c4c4
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/mm.c
@@ -0,0 +1,450 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <nvgpu/mm.h>
+#include <nvgpu/vm.h>
+#include <nvgpu/dma.h>
+#include <nvgpu/vm_area.h>
+#include <nvgpu/gmmu.h>
+#include <nvgpu/vidmem.h>
+#include <nvgpu/semaphore.h>
+#include <nvgpu/pramin.h>
+#include <nvgpu/enabled.h>
+#include "gk20a/gk20a.h"
+/*
+ * Attempt to find a reserved memory area to determine PTE size for the passed
+ * mapping. If no reserved area can be found use small pages.
+ */
+enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm,
+                                              u64 base, u64 size)
+{
+        struct nvgpu_vm_area *vm_area;
+        vm_area = nvgpu_vm_area_find(vm, base);
+        if (!vm_area)
+                return gmmu_page_size_small;
+        return vm_area->pgsz_idx;
+}
+/*
+ * This is for when the address space does not support unified address spaces.
+ */
+static enum gmmu_pgsz_gk20a __get_pte_size_split_addr(struct vm_gk20a *vm,
+                                               u64 base, u64 size)
+{
+        if (!base) {
+                if (size >= vm->gmmu_page_sizes[gmmu_page_size_big])
+                        return gmmu_page_size_big;
+                return gmmu_page_size_small;
+        } else {
+                if (base < __nv_gmmu_va_small_page_limit())
+                        return gmmu_page_size_small;
+                else
+                        return gmmu_page_size_big;
+        }
+}
+/*
+ * This determines the PTE size for a given alloc. Used by both the GVA space
+ * allocator and the mm core code so that agreement can be reached on how to
+ * map allocations.
+ *
+ * The page size of a buffer is this:
+ *
+ *   o  If the VM doesn't support large pages then obviously small pages
+ *      must be used.
+ *   o  If the base address is non-zero (fixed address map):
+ *      - Attempt to find a reserved memory area and use the page size
+ *        based on that.
+ *      - If no reserved page size is available, default to small pages.
+ *   o  If the base is zero:
+ *      - If the size is larger than or equal to the big page size, use big
+ *        pages.
+ *      - Otherwise use small pages.
+ */
+enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size)
+{
+        struct gk20a *g = gk20a_from_vm(vm);
+        if (!vm->big_pages)
+                return gmmu_page_size_small;
+        if (!nvgpu_is_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES))
+                return __get_pte_size_split_addr(vm, base, size);
+        if (base)
+                return __get_pte_size_fixed_map(vm, base, size);
+        if (size >= vm->gmmu_page_sizes[gmmu_page_size_big])
+                return gmmu_page_size_big;
+        return gmmu_page_size_small;
+}
+int nvgpu_mm_suspend(struct gk20a *g)
+{
+        nvgpu_log_info(g, "MM suspend running...");
+        nvgpu_vidmem_thread_pause_sync(&g->mm);
+        g->ops.mm.cbc_clean(g);
+        g->ops.mm.l2_flush(g, false);
+        nvgpu_log_info(g, "MM suspend done!");
+        return 0;
+}
+u64 nvgpu_inst_block_addr(struct gk20a *g, struct nvgpu_mem *inst_block)
+{
+        if (g->mm.has_physical_mode)
+                return nvgpu_mem_get_phys_addr(g, inst_block);
+        else
+                return nvgpu_mem_get_addr(g, inst_block);
+}
+void nvgpu_free_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
+{
+        if (nvgpu_mem_is_valid(inst_block))
+                nvgpu_dma_free(g, inst_block);
+}
+static int nvgpu_alloc_sysmem_flush(struct gk20a *g)
+{
+        return nvgpu_dma_alloc_sys(g, SZ_4K, &g->mm.sysmem_flush);
+}
+static void nvgpu_remove_mm_ce_support(struct mm_gk20a *mm)
+{
+        struct gk20a *g = gk20a_from_mm(mm);
+        if (mm->vidmem.ce_ctx_id != (u32)~0)
+                gk20a_ce_delete_context_priv(g, mm->vidmem.ce_ctx_id);
+        mm->vidmem.ce_ctx_id = (u32)~0;
+        nvgpu_vm_put(mm->ce.vm);
+}
+static void nvgpu_remove_mm_support(struct mm_gk20a *mm)
+{
+        struct gk20a *g = gk20a_from_mm(mm);
+        if (g->ops.mm.fault_info_mem_destroy)
+                g->ops.mm.fault_info_mem_destroy(g);
+        if (g->ops.mm.remove_bar2_vm)
+                g->ops.mm.remove_bar2_vm(g);
+        if (g->ops.mm.is_bar1_supported(g)) {
+                nvgpu_free_inst_block(g, &mm->bar1.inst_block);
+                nvgpu_vm_put(mm->bar1.vm);
+        }
+        nvgpu_free_inst_block(g, &mm->pmu.inst_block);
+        nvgpu_free_inst_block(g, &mm->hwpm.inst_block);
+        nvgpu_vm_put(mm->pmu.vm);
+        nvgpu_vm_put(mm->cde.vm);
+        nvgpu_semaphore_sea_destroy(g);
+        nvgpu_vidmem_destroy(g);
+        nvgpu_pd_cache_fini(g);
+}
+/* pmu vm, share channel_vm interfaces */
+static int nvgpu_init_system_vm(struct mm_gk20a *mm)
+{
+        int err;
+        struct gk20a *g = gk20a_from_mm(mm);
+        struct nvgpu_mem *inst_block = &mm->pmu.inst_block;
+        u32 big_page_size = g->ops.mm.get_default_big_page_size();
+        u32 low_hole, aperture_size;
+        /*
+         * No user region - so we will pass that as zero sized.
+         */
+        low_hole = SZ_4K * 16;
+        aperture_size = GK20A_PMU_VA_SIZE * 2;
+        mm->pmu.aperture_size = GK20A_PMU_VA_SIZE;
+        nvgpu_log_info(g, "pmu vm size = 0x%x", mm->pmu.aperture_size);
+        mm->pmu.vm = nvgpu_vm_init(g, big_page_size,
+                                   low_hole,
+                                   aperture_size - low_hole,
+                                   aperture_size,
+                                   true,
+                                   false,
+                                   "system");
+        if (!mm->pmu.vm)
+                return -ENOMEM;
+        err = g->ops.mm.alloc_inst_block(g, inst_block);
+        if (err)
+                goto clean_up_vm;
+        g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, big_page_size);
+        return 0;
+clean_up_vm:
+        nvgpu_vm_put(mm->pmu.vm);
+        return err;
+}
+static int nvgpu_init_hwpm(struct mm_gk20a *mm)
+{
+        int err;
+        struct gk20a *g = gk20a_from_mm(mm);
+        struct nvgpu_mem *inst_block = &mm->hwpm.inst_block;
+        err = g->ops.mm.alloc_inst_block(g, inst_block);
+        if (err)
+                return err;
+        g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, 0);
+        return 0;
+}
+static int nvgpu_init_cde_vm(struct mm_gk20a *mm)
+{
+        struct gk20a *g = gk20a_from_mm(mm);
+        u32 big_page_size = g->ops.mm.get_default_big_page_size();
+        mm->cde.vm = nvgpu_vm_init(g, big_page_size,
+                                   big_page_size << 10,
+                                   NV_MM_DEFAULT_KERNEL_SIZE,
+                                   NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
+                                   false, false, "cde");
+        if (!mm->cde.vm)
+                return -ENOMEM;
+        return 0;
+}
+static int nvgpu_init_ce_vm(struct mm_gk20a *mm)
+{
+        struct gk20a *g = gk20a_from_mm(mm);
+        u32 big_page_size = g->ops.mm.get_default_big_page_size();
+        mm->ce.vm = nvgpu_vm_init(g, big_page_size,
+                                  big_page_size << 10,
+                                  NV_MM_DEFAULT_KERNEL_SIZE,
+                                  NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
+                                  false, false, "ce");
+        if (!mm->ce.vm)
+                return -ENOMEM;
+        return 0;
+}
+void nvgpu_init_mm_ce_context(struct gk20a *g)
+{
+#if defined(CONFIG_GK20A_VIDMEM)
+        if (g->mm.vidmem.size && (g->mm.vidmem.ce_ctx_id == (u32)~0)) {
+                g->mm.vidmem.ce_ctx_id =
+                        gk20a_ce_create_context(g,
+                                gk20a_fifo_get_fast_ce_runlist_id(g),
+                                -1,
+                                -1);
+                if (g->mm.vidmem.ce_ctx_id == (u32)~0)
+                        nvgpu_err(g,
+                                "Failed to allocate CE context for vidmem page clearing support");
+        }
+#endif
+}
+static int nvgpu_init_mm_reset_enable_hw(struct gk20a *g)
+{
+        if (g->ops.fb.reset)
+                g->ops.fb.reset(g);
+        if (g->ops.clock_gating.slcg_fb_load_gating_prod)
+                g->ops.clock_gating.slcg_fb_load_gating_prod(g,
+                                g->slcg_enabled);
+        if (g->ops.clock_gating.slcg_ltc_load_gating_prod)
+                g->ops.clock_gating.slcg_ltc_load_gating_prod(g,
+                                g->slcg_enabled);
+        if (g->ops.clock_gating.blcg_fb_load_gating_prod)
+                g->ops.clock_gating.blcg_fb_load_gating_prod(g,
+                                g->blcg_enabled);
+        if (g->ops.clock_gating.blcg_ltc_load_gating_prod)
+                g->ops.clock_gating.blcg_ltc_load_gating_prod(g,
+                                g->blcg_enabled);
+        if (g->ops.fb.init_fs_state)
+                g->ops.fb.init_fs_state(g);
+        return 0;
+}
+static int nvgpu_init_bar1_vm(struct mm_gk20a *mm)
+{
+        int err;
+        struct gk20a *g = gk20a_from_mm(mm);
+        struct nvgpu_mem *inst_block = &mm->bar1.inst_block;
+        u32 big_page_size = g->ops.mm.get_default_big_page_size();
+        mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
+        nvgpu_log_info(g, "bar1 vm size = 0x%x", mm->bar1.aperture_size);
+        mm->bar1.vm = nvgpu_vm_init(g,
+                                    big_page_size,
+                                    SZ_4K,
+                                    mm->bar1.aperture_size - SZ_4K,
+                                    mm->bar1.aperture_size,
+                                    true, false,
+                                    "bar1");
+        if (!mm->bar1.vm)
+                return -ENOMEM;
+        err = g->ops.mm.alloc_inst_block(g, inst_block);
+        if (err)
+                goto clean_up_vm;
+        g->ops.mm.init_inst_block(inst_block, mm->bar1.vm, big_page_size);
+        return 0;
+clean_up_vm:
+        nvgpu_vm_put(mm->bar1.vm);
+        return err;
+}
+static int nvgpu_init_mm_setup_sw(struct gk20a *g)
+{
+        struct mm_gk20a *mm = &g->mm;
+        int err;
+        if (mm->sw_ready) {
+                nvgpu_log_info(g, "skip init");
+                return 0;
+        }
+        mm->g = g;
+        nvgpu_mutex_init(&mm->l2_op_lock);
+        /*TBD: make channel vm size configurable */
+        mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE -
+                NV_MM_DEFAULT_KERNEL_SIZE;
+        mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE;
+        nvgpu_log_info(g, "channel vm size: user %dMB  kernel %dMB",
+                   (int)(mm->channel.user_size >> 20),
+                   (int)(mm->channel.kernel_size >> 20));
+        nvgpu_init_pramin(mm);
+        mm->vidmem.ce_ctx_id = (u32)~0;
+        err = nvgpu_vidmem_init(mm);
+        if (err)
+                return err;
+        /*
+         * this requires fixed allocations in vidmem which must be
+         * allocated before all other buffers
+         */
+        if (g->ops.pmu.alloc_blob_space
+                        && !nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY)) {
+                err = g->ops.pmu.alloc_blob_space(g, 0, &g->acr.ucode_blob);
+                if (err)
+                        return err;
+        }
+        err = nvgpu_alloc_sysmem_flush(g);
+        if (err)
+                return err;
+        if (g->ops.mm.is_bar1_supported(g)) {
+                err = nvgpu_init_bar1_vm(mm);
+                if (err)
+                        return err;
+        }
+        if (g->ops.mm.init_bar2_vm) {
+                err = g->ops.mm.init_bar2_vm(g);
+                if (err)
+                        return err;
+        }
+        err = nvgpu_init_system_vm(mm);
+        if (err)
+                return err;
+        err = nvgpu_init_hwpm(mm);
+        if (err)
+                return err;
+        err = nvgpu_init_cde_vm(mm);
+        if (err)
+                return err;
+        err = nvgpu_init_ce_vm(mm);
+        if (err)
+                return err;
+        mm->remove_support = nvgpu_remove_mm_support;
+        mm->remove_ce_support = nvgpu_remove_mm_ce_support;
+        mm->sw_ready = true;
+        return 0;
+}
+int nvgpu_init_mm_support(struct gk20a *g)
+{
+        u32 err;
+        err = nvgpu_init_mm_reset_enable_hw(g);
+        if (err)
+                return err;
+        err = nvgpu_init_mm_setup_sw(g);
+        if (err)
+                return err;
+        if (g->ops.mm.init_mm_setup_hw)
+                err = g->ops.mm.init_mm_setup_hw(g);
+        return err;
+}
+u32 nvgpu_mm_get_default_big_page_size(struct gk20a *g)
+{
+        u32 big_page_size;
+        big_page_size = g->ops.mm.get_default_big_page_size();
+        if (g->mm.disable_bigpage)
+                big_page_size = 0;
+        return big_page_size;
+}
+u32 nvgpu_mm_get_available_big_page_sizes(struct gk20a *g)
+{
+        u32 available_big_page_sizes = 0;
+        if (!g->mm.disable_bigpage) {
+                available_big_page_sizes =
+                        g->ops.mm.get_default_big_page_size();
+                if (g->ops.mm.get_big_page_sizes)
+                        available_big_page_sizes |= g->ops.mm.get_big_page_sizes();
+        }
+        return available_big_page_sizes;
+}
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c b/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c
new file mode 100644
index 00000000..7a4e7705
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c
@@ -0,0 +1,162 @@
+/*
+ * gk20a allocator
+ *
+ * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <nvgpu/allocator.h>
+#include "gk20a/gk20a.h"
+#include "gk20a/mm_gk20a.h"
+u64 nvgpu_alloc_length(struct nvgpu_allocator *a)
+{
+        if (a->ops->length)
+                return a->ops->length(a);
+        return 0;
+}
+u64 nvgpu_alloc_base(struct nvgpu_allocator *a)
+{
+        if (a->ops->base)
+                return a->ops->base(a);
+        return 0;
+}
+u64 nvgpu_alloc_initialized(struct nvgpu_allocator *a)
+{
+        if (!a->ops || !a->ops->inited)
+                return 0;
+        return a->ops->inited(a);
+}
+u64 nvgpu_alloc_end(struct nvgpu_allocator *a)
+{
+        if (a->ops->end)
+                return a->ops->end(a);
+        return 0;
+}
+u64 nvgpu_alloc_space(struct nvgpu_allocator *a)
+{
+        if (a->ops->space)
+                return a->ops->space(a);
+        return 0;
+}
+u64 nvgpu_alloc(struct nvgpu_allocator *a, u64 len)
+{
+        return a->ops->alloc(a, len);
+}
+void nvgpu_free(struct nvgpu_allocator *a, u64 addr)
+{
+        a->ops->free(a, addr);
+}
+u64 nvgpu_alloc_fixed(struct nvgpu_allocator *a, u64 base, u64 len,
+                      u32 page_size)
+{
+        if (a->ops->alloc_fixed)
+                return a->ops->alloc_fixed(a, base, len, page_size);
+        return 0;
+}
+void nvgpu_free_fixed(struct nvgpu_allocator *a, u64 base, u64 len)
+{
+        /*
+         * If this operation is not defined for the allocator then just do
+         * nothing. The alternative would be to fall back on the regular
+         * free but that may be harmful in unexpected ways.
+         */
+        if (a->ops->free_fixed)
+                a->ops->free_fixed(a, base, len);
+}
+int nvgpu_alloc_reserve_carveout(struct nvgpu_allocator *a,
+                                 struct nvgpu_alloc_carveout *co)
+{
+        if (a->ops->reserve_carveout)
+                return a->ops->reserve_carveout(a, co);
+        return -ENODEV;
+}
+void nvgpu_alloc_release_carveout(struct nvgpu_allocator *a,
+                                  struct nvgpu_alloc_carveout *co)
+{
+        if (a->ops->release_carveout)
+                a->ops->release_carveout(a, co);
+}
+void nvgpu_alloc_destroy(struct nvgpu_allocator *a)
+{
+        a->ops->fini(a);
+        nvgpu_mutex_destroy(&a->lock);
+        memset(a, 0, sizeof(*a));
+}
+#ifdef __KERNEL__
+void nvgpu_alloc_print_stats(struct nvgpu_allocator *__a,
+                             struct seq_file *s, int lock)
+{
+        __a->ops->print_stats(__a, s, lock);
+}
+#endif
+/*
+ * Handle the common init stuff for a nvgpu_allocator.
+ */
+int __nvgpu_alloc_common_init(struct nvgpu_allocator *a, struct gk20a *g,
+                              const char *name, void *priv, bool dbg,
+                              const struct nvgpu_allocator_ops *ops)
+{
+        int err;
+        if (!ops)
+                return -EINVAL;
+        /*
+         * This is the bare minimum operations required for a sensible
+         * allocator.
+         */
+        if (!ops->alloc || !ops->free || !ops->fini)
+                return -EINVAL;
+        err = nvgpu_mutex_init(&a->lock);
+        if (err)
+                return err;
+        a->g = g;
+        a->ops = ops;
+        a->priv = priv;
+        a->debug = dbg;
+        strlcpy(a->name, name, sizeof(a->name));
+        return 0;
+}
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
new file mode 100644
index 00000000..b4e718b4
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <nvgpu/kmem.h>
+#include <nvgpu/nvgpu_mem.h>
+#include <nvgpu/dma.h>
+#include <nvgpu/vidmem.h>
+#include "gk20a/gk20a.h"
+void *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt, void *sgl)
+{
+        return sgt->ops->sgl_next(sgl);
+}
+u64 nvgpu_sgt_get_phys(struct nvgpu_sgt *sgt, void *sgl)
+{
+        return sgt->ops->sgl_phys(sgl);
+}
+u64 nvgpu_sgt_get_dma(struct nvgpu_sgt *sgt, void *sgl)
+{
+        return sgt->ops->sgl_dma(sgl);
+}
+u64 nvgpu_sgt_get_length(struct nvgpu_sgt *sgt, void *sgl)
+{
+        return sgt->ops->sgl_length(sgl);
+}
+u64 nvgpu_sgt_get_gpu_addr(struct gk20a *g, struct nvgpu_sgt *sgt, void *sgl,
+                           struct nvgpu_gmmu_attrs *attrs)
+{
+        return sgt->ops->sgl_gpu_addr(g, sgl, attrs);
+}
+bool nvgpu_sgt_iommuable(struct gk20a *g, struct nvgpu_sgt *sgt)
+{
+        if (sgt->ops->sgt_iommuable)
+                return sgt->ops->sgt_iommuable(g, sgt);
+        return false;
+}
+void nvgpu_sgt_free(struct gk20a *g, struct nvgpu_sgt *sgt)
+{
+        if (sgt && sgt->ops->sgt_free)
+                sgt->ops->sgt_free(g, sgt);
+}
+u64 nvgpu_mem_iommu_translate(struct gk20a *g, u64 phys)
+{
+        /* ensure it is not vidmem allocation */
+        WARN_ON(nvgpu_addr_is_vidmem_page_alloc(phys));
+        if (nvgpu_iommuable(g) && g->ops.mm.get_iommu_bit)
+                return phys | 1ULL << g->ops.mm.get_iommu_bit(g);
+        return phys;
+}
+/*
+ * Determine alignment for a passed buffer. Necessary since the buffer may
+ * appear big enough to map with large pages but the SGL may have chunks that
+ * are not aligned on a 64/128kB large page boundary. There's also the
+ * possibility chunks are odd sizes which will necessitate small page mappings
+ * to correctly glue them together into a contiguous virtual mapping.
+ */
+u64 nvgpu_sgt_alignment(struct gk20a *g, struct nvgpu_sgt *sgt)
+{
+        u64 align = 0, chunk_align = 0;
+        void *sgl;
+        /*
+         * If this SGT is iommuable and we want to use the IOMMU address then
+         * the SGT's first entry has the IOMMU address. We will align on this
+         * and double check length of buffer later. Also, since there's an
+         * IOMMU we know that this DMA address is contiguous.
+         */
+        if (!g->mm.bypass_smmu &&
+            nvgpu_sgt_iommuable(g, sgt) &&
+            nvgpu_sgt_get_dma(sgt, sgt->sgl))
+                return 1ULL << __ffs(nvgpu_sgt_get_dma(sgt, sgt->sgl));
+        /*
+         * Otherwise the buffer is not iommuable (VIDMEM, for example) or we are
+         * bypassing the IOMMU and need to use the underlying physical entries
+         * of the SGT.
+         */
+        nvgpu_sgt_for_each_sgl(sgl, sgt) {
+                chunk_align = 1ULL << __ffs(nvgpu_sgt_get_phys(sgt, sgl) |
+                                            nvgpu_sgt_get_length(sgt, sgl));
+                if (align)
+                        align = min(align, chunk_align);
+                else
+                        align = chunk_align;
+        }
+        return align;
+}
diff --git a/drivers/gpu/nvgpu/common/mm/page_allocator.c b/drivers/gpu/nvgpu/common/mm/page_allocator.c
new file mode 100644
index 00000000..d5ce5d8e
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/page_allocator.c
@@ -0,0 +1,1047 @@
+/*
+ * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <nvgpu/bitops.h>
+#include <nvgpu/allocator.h>
+#include <nvgpu/page_allocator.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/log2.h>
+#include "buddy_allocator_priv.h"
+#define palloc_dbg(a, fmt, arg...)                      \
+        alloc_dbg(palloc_owner(a), fmt, ##arg)
+/*
+ * Since some Linux headers are still leaked into common code this is necessary
+ * for some builds.
+ */
+#ifdef PAGE_SIZE
+#undef PAGE_SIZE
+#endif
+#ifdef PAGE_ALIGN
+#undef PAGE_ALIGN
+#endif
+/*
+ * VIDMEM page size is 4k.
+ */
+#define PAGE_SIZE               0x1000
+#define PAGE_ALIGN(addr)        ((addr + (PAGE_SIZE - 1)) &             \
+                                 ((typeof(addr)) ~(PAGE_SIZE - 1)))
+/*
+ * Handle the book-keeping for these operations.
+ */
+static inline void add_slab_page_to_empty(struct page_alloc_slab *slab,
+                                          struct page_alloc_slab_page *page)
+{
+        BUG_ON(page->state != SP_NONE);
+        nvgpu_list_add(&page->list_entry, &slab->empty);
+        slab->nr_empty++;
+        page->state = SP_EMPTY;
+}
+static inline void add_slab_page_to_partial(struct page_alloc_slab *slab,
+                                            struct page_alloc_slab_page *page)
+{
+        BUG_ON(page->state != SP_NONE);
+        nvgpu_list_add(&page->list_entry, &slab->partial);
+        slab->nr_partial++;
+        page->state = SP_PARTIAL;
+}
+static inline void add_slab_page_to_full(struct page_alloc_slab *slab,
+                                         struct page_alloc_slab_page *page)
+{
+        BUG_ON(page->state != SP_NONE);
+        nvgpu_list_add(&page->list_entry, &slab->full);
+        slab->nr_full++;
+        page->state = SP_FULL;
+}
+static inline void del_slab_page_from_empty(struct page_alloc_slab *slab,
+                                            struct page_alloc_slab_page *page)
+{
+        nvgpu_list_del(&page->list_entry);
+        slab->nr_empty--;
+        page->state = SP_NONE;
+}
+static inline void del_slab_page_from_partial(struct page_alloc_slab *slab,
+                                              struct page_alloc_slab_page *page)
+{
+        nvgpu_list_del(&page->list_entry);
+        slab->nr_partial--;
+        page->state = SP_NONE;
+}
+static inline void del_slab_page_from_full(struct page_alloc_slab *slab,
+                                           struct page_alloc_slab_page *page)
+{
+        nvgpu_list_del(&page->list_entry);
+        slab->nr_full--;
+        page->state = SP_NONE;
+}
+static u64 nvgpu_page_alloc_length(struct nvgpu_allocator *a)
+{
+        struct nvgpu_page_allocator *va = a->priv;
+        return nvgpu_alloc_length(&va->source_allocator);
+}
+static u64 nvgpu_page_alloc_base(struct nvgpu_allocator *a)
+{
+        struct nvgpu_page_allocator *va = a->priv;
+        return nvgpu_alloc_base(&va->source_allocator);
+}
+static int nvgpu_page_alloc_inited(struct nvgpu_allocator *a)
+{
+        struct nvgpu_page_allocator *va = a->priv;
+        return nvgpu_alloc_initialized(&va->source_allocator);
+}
+static u64 nvgpu_page_alloc_end(struct nvgpu_allocator *a)
+{
+        struct nvgpu_page_allocator *va = a->priv;
+        return nvgpu_alloc_end(&va->source_allocator);
+}
+static u64 nvgpu_page_alloc_space(struct nvgpu_allocator *a)
+{
+        struct nvgpu_page_allocator *va = a->priv;
+        return nvgpu_alloc_space(&va->source_allocator);
+}
+static int nvgpu_page_reserve_co(struct nvgpu_allocator *a,
+                                 struct nvgpu_alloc_carveout *co)
+{
+        struct nvgpu_page_allocator *va = a->priv;
+        return nvgpu_alloc_reserve_carveout(&va->source_allocator, co);
+}
+static void nvgpu_page_release_co(struct nvgpu_allocator *a,
+                                  struct nvgpu_alloc_carveout *co)
+{
+        struct nvgpu_page_allocator *va = a->priv;
+        nvgpu_alloc_release_carveout(&va->source_allocator, co);
+}
+static void *nvgpu_page_alloc_sgl_next(void *sgl)
+{
+        struct nvgpu_mem_sgl *nvgpu_sgl = sgl;
+        return nvgpu_sgl->next;
+}
+static u64 nvgpu_page_alloc_sgl_phys(void *sgl)
+{
+        struct nvgpu_mem_sgl *nvgpu_sgl = sgl;
+        return nvgpu_sgl->phys;
+}
+static u64 nvgpu_page_alloc_sgl_dma(void *sgl)
+{
+        struct nvgpu_mem_sgl *nvgpu_sgl = sgl;
+        return nvgpu_sgl->dma;
+}
+static u64 nvgpu_page_alloc_sgl_length(void *sgl)
+{
+        struct nvgpu_mem_sgl *nvgpu_sgl = sgl;
+        return nvgpu_sgl->length;
+}
+static u64 nvgpu_page_alloc_sgl_gpu_addr(struct gk20a *g, void *sgl,
+                                         struct nvgpu_gmmu_attrs *attrs)
+{
+        struct nvgpu_mem_sgl *nvgpu_sgl = sgl;
+        return nvgpu_sgl->phys;
+}
+static void nvgpu_page_alloc_sgt_free(struct gk20a *g, struct nvgpu_sgt *sgt)
+{
+        /*
+         * No-op here. The free is handled by the page_alloc free() functions.
+         */
+}
+/*
+ * These implement the generic scatter gather ops for pages allocated
+ * by the page allocator. however, the primary aim for this, is of course,
+ * vidmem.
+ */
+static const struct nvgpu_sgt_ops page_alloc_sgl_ops = {
+        .sgl_next = nvgpu_page_alloc_sgl_next,
+        .sgl_phys = nvgpu_page_alloc_sgl_phys,
+        .sgl_dma = nvgpu_page_alloc_sgl_dma,
+        .sgl_length = nvgpu_page_alloc_sgl_length,
+        .sgl_gpu_addr = nvgpu_page_alloc_sgl_gpu_addr,
+        .sgt_free = nvgpu_page_alloc_sgt_free,
+};
+/*
+ * This actually frees the sgl memory. Used by the page_alloc free() functions.
+ */
+static void nvgpu_page_alloc_sgl_proper_free(struct gk20a *g,
+                                             struct nvgpu_mem_sgl *sgl)
+{
+        struct nvgpu_mem_sgl *next;
+        while (sgl) {
+                next = sgl->next;
+                nvgpu_kfree(g, sgl);
+                sgl = next;
+        }
+}
+static void __nvgpu_free_pages(struct nvgpu_page_allocator *a,
+                               struct nvgpu_page_alloc *alloc,
+                               bool free_buddy_alloc)
+{
+        struct nvgpu_mem_sgl *sgl = alloc->sgt.sgl;
+        if (free_buddy_alloc) {
+                while (sgl) {
+                        nvgpu_free(&a->source_allocator,
+                                   nvgpu_sgt_get_phys(&alloc->sgt, sgl));
+                        sgl = nvgpu_sgt_get_next(&alloc->sgt, sgl);
+                }
+        }
+        nvgpu_page_alloc_sgl_proper_free(a->owner->g, sgl);
+        nvgpu_kmem_cache_free(a->alloc_cache, alloc);
+}
+static int __insert_page_alloc(struct nvgpu_page_allocator *a,
+                               struct nvgpu_page_alloc *alloc)
+{
+        alloc->tree_entry.key_start = alloc->base;
+        alloc->tree_entry.key_end = alloc->base + alloc->length;
+        nvgpu_rbtree_insert(&alloc->tree_entry, &a->allocs);
+        return 0;
+}
+static struct nvgpu_page_alloc *__find_page_alloc(
+        struct nvgpu_page_allocator *a,
+        u64 addr)
+{
+        struct nvgpu_page_alloc *alloc;
+        struct nvgpu_rbtree_node *node = NULL;
+        nvgpu_rbtree_search(addr, &node, a->allocs);
+        if (!node)
+                return NULL;
+        alloc = nvgpu_page_alloc_from_rbtree_node(node);
+        nvgpu_rbtree_unlink(node, &a->allocs);
+        return alloc;
+}
+static struct page_alloc_slab_page *alloc_slab_page(
+        struct nvgpu_page_allocator *a,
+        struct page_alloc_slab *slab)
+{
+        struct page_alloc_slab_page *slab_page;
+        slab_page = nvgpu_kmem_cache_alloc(a->slab_page_cache);
+        if (!slab_page) {
+                palloc_dbg(a, "OOM: unable to alloc slab_page struct!\n");
+                return NULL;
+        }
+        memset(slab_page, 0, sizeof(*slab_page));
+        slab_page->page_addr = nvgpu_alloc(&a->source_allocator, a->page_size);
+        if (!slab_page->page_addr) {
+                nvgpu_kmem_cache_free(a->slab_page_cache, slab_page);
+                palloc_dbg(a, "OOM: vidmem is full!\n");
+                return NULL;
+        }
+        nvgpu_init_list_node(&slab_page->list_entry);
+        slab_page->slab_size = slab->slab_size;
+        slab_page->nr_objects = (u32)a->page_size / slab->slab_size;
+        slab_page->nr_objects_alloced = 0;
+        slab_page->owner = slab;
+        slab_page->state = SP_NONE;
+        a->pages_alloced++;
+        palloc_dbg(a, "Allocated new slab page @ 0x%012llx size=%u\n",
+                   slab_page->page_addr, slab_page->slab_size);
+        return slab_page;
+}
+static void free_slab_page(struct nvgpu_page_allocator *a,
+                           struct page_alloc_slab_page *slab_page)
+{
+        palloc_dbg(a, "Freeing slab page @ 0x%012llx\n", slab_page->page_addr);
+        BUG_ON((slab_page->state != SP_NONE && slab_page->state != SP_EMPTY) ||
+               slab_page->nr_objects_alloced != 0 ||
+               slab_page->bitmap != 0);
+        nvgpu_free(&a->source_allocator, slab_page->page_addr);
+        a->pages_freed++;
+        nvgpu_kmem_cache_free(a->slab_page_cache, slab_page);
+}
+/*
+ * This expects @alloc to have 1 empty sgl_entry ready for usage.
+ */
+static int __do_slab_alloc(struct nvgpu_page_allocator *a,
+                           struct page_alloc_slab *slab,
+                           struct nvgpu_page_alloc *alloc)
+{
+        struct page_alloc_slab_page *slab_page = NULL;
+        struct nvgpu_mem_sgl *sgl;
+        unsigned long offs;
+        /*
+         * Check the partial and empty lists to see if we have some space
+         * readily available. Take the slab_page out of what ever list it
+         * was in since it may be put back into a different list later.
+         */
+        if (!nvgpu_list_empty(&slab->partial)) {
+                slab_page = nvgpu_list_first_entry(&slab->partial,
+                                             page_alloc_slab_page,
+                                             list_entry);
+                del_slab_page_from_partial(slab, slab_page);
+        } else if (!nvgpu_list_empty(&slab->empty)) {
+                slab_page = nvgpu_list_first_entry(&slab->empty,
+                                             page_alloc_slab_page,
+                                             list_entry);
+                del_slab_page_from_empty(slab, slab_page);
+        }
+        if (!slab_page) {
+                slab_page = alloc_slab_page(a, slab);
+                if (!slab_page)
+                        return -ENOMEM;
+        }
+        /*
+         * We now have a slab_page. Do the alloc.
+         */
+        offs = bitmap_find_next_zero_area(&slab_page->bitmap,
+                                          slab_page->nr_objects,
+                                          0, 1, 0);
+        if (offs >= slab_page->nr_objects) {
+                WARN(1, "Empty/partial slab with no free objects?");
+                /* Add the buggy page to the full list... This isn't ideal. */
+                add_slab_page_to_full(slab, slab_page);
+                return -ENOMEM;
+        }
+        bitmap_set(&slab_page->bitmap, offs, 1);
+        slab_page->nr_objects_alloced++;
+        if (slab_page->nr_objects_alloced < slab_page->nr_objects)
+                add_slab_page_to_partial(slab, slab_page);
+        else if (slab_page->nr_objects_alloced == slab_page->nr_objects)
+                add_slab_page_to_full(slab, slab_page);
+        else
+                BUG(); /* Should be impossible to hit this. */
+        /*
+         * Handle building the nvgpu_page_alloc struct. We expect one sgl
+         * to be present.
+         */
+        alloc->slab_page = slab_page;
+        alloc->nr_chunks = 1;
+        alloc->length = slab_page->slab_size;
+        alloc->base = slab_page->page_addr + (offs * slab_page->slab_size);
+        sgl         = alloc->sgt.sgl;
+        sgl->phys   = alloc->base;
+        sgl->dma    = alloc->base;
+        sgl->length = alloc->length;
+        sgl->next   = NULL;
+        return 0;
+}
+/*
+ * Allocate from a slab instead of directly from the page allocator.
+ */
+static struct nvgpu_page_alloc *__nvgpu_alloc_slab(
+        struct nvgpu_page_allocator *a, u64 len)
+{
+        int err, slab_nr;
+        struct page_alloc_slab *slab;
+        struct nvgpu_page_alloc *alloc = NULL;
+        struct nvgpu_mem_sgl *sgl = NULL;
+        /*
+         * Align the length to a page and then divide by the page size (4k for
+         * this code). ilog2() of that then gets us the correct slab to use.
+         */
+        slab_nr = (int)ilog2(PAGE_ALIGN(len) >> 12);
+        slab = &a->slabs[slab_nr];
+        alloc = nvgpu_kmem_cache_alloc(a->alloc_cache);
+        if (!alloc) {
+                palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n");
+                goto fail;
+        }
+        alloc->sgt.ops = &page_alloc_sgl_ops;
+        sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl));
+        if (!sgl) {
+                palloc_dbg(a, "OOM: could not alloc sgl struct!\n");
+                goto fail;
+        }
+        alloc->sgt.sgl = sgl;
+        err = __do_slab_alloc(a, slab, alloc);
+        if (err)
+                goto fail;
+        palloc_dbg(a, "Alloc 0x%04llx sr=%d id=0x%010llx [slab]\n",
+                   len, slab_nr, alloc->base);
+        a->nr_slab_allocs++;
+        return alloc;
+fail:
+        if (alloc)
+                nvgpu_kmem_cache_free(a->alloc_cache, alloc);
+        if (sgl)
+                nvgpu_kfree(a->owner->g, sgl);
+        return NULL;
+}
+static void __nvgpu_free_slab(struct nvgpu_page_allocator *a,
+                              struct nvgpu_page_alloc *alloc)
+{
+        struct page_alloc_slab_page *slab_page = alloc->slab_page;
+        struct page_alloc_slab *slab = slab_page->owner;
+        enum slab_page_state new_state;
+        int offs;
+        offs = (u32)(alloc->base - slab_page->page_addr) / slab_page->slab_size;
+        bitmap_clear(&slab_page->bitmap, offs, 1);
+        slab_page->nr_objects_alloced--;
+        if (slab_page->nr_objects_alloced == 0)
+                new_state = SP_EMPTY;
+        else
+                new_state = SP_PARTIAL;
+        /*
+         * Need to migrate the page to a different list.
+         */
+        if (new_state != slab_page->state) {
+                /* Delete - can't be in empty. */
+                if (slab_page->state == SP_PARTIAL)
+                        del_slab_page_from_partial(slab, slab_page);
+                else
+                        del_slab_page_from_full(slab, slab_page);
+                /* And add. */
+                if (new_state == SP_EMPTY) {
+                        if (nvgpu_list_empty(&slab->empty))
+                                add_slab_page_to_empty(slab, slab_page);
+                        else
+                                free_slab_page(a, slab_page);
+                } else {
+                        add_slab_page_to_partial(slab, slab_page);
+                }
+        }
+        /*
+         * Now handle the page_alloc.
+         */
+        __nvgpu_free_pages(a, alloc, false);
+        a->nr_slab_frees++;
+        return;
+}
+/*
+ * Allocate physical pages. Since the underlying allocator is a buddy allocator
+ * the returned pages are always contiguous. However, since there could be
+ * fragmentation in the space this allocator will collate smaller non-contiguous
+ * allocations together if necessary.
+ */
+static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages(
+        struct nvgpu_page_allocator *a, u64 pages)
+{
+        struct nvgpu_page_alloc *alloc;
+        struct nvgpu_mem_sgl *sgl, *prev_sgl = NULL;
+        u64 max_chunk_len = pages << a->page_shift;
+        int i = 0;
+        alloc = nvgpu_kmem_cache_alloc(a->alloc_cache);
+        if (!alloc)
+                goto fail;
+        memset(alloc, 0, sizeof(*alloc));
+        alloc->length = pages << a->page_shift;
+        alloc->sgt.ops = &page_alloc_sgl_ops;
+        while (pages) {
+                u64 chunk_addr = 0;
+                u64 chunk_pages = (u64)1 << __fls(pages);
+                u64 chunk_len = chunk_pages << a->page_shift;
+                /*
+                 * Take care of the possibility that the allocation must be
+                 * contiguous. If this is not the first iteration then that
+                 * means the first iteration failed to alloc the entire
+                 * requested size. The buddy allocator guarantees any given
+                 * single alloc is contiguous.
+                 */
+                if (a->flags & GPU_ALLOC_FORCE_CONTIG && i != 0)
+                        goto fail_cleanup;
+                if (chunk_len > max_chunk_len)
+                        chunk_len = max_chunk_len;
+                /*
+                 * Keep attempting to allocate in smaller chunks until the alloc
+                 * either succeeds or is smaller than the page_size of the
+                 * allocator (i.e the allocator is OOM).
+                 */
+                do {
+                        chunk_addr = nvgpu_alloc(&a->source_allocator,
+                                                 chunk_len);
+                        /* Divide by 2 and try again */
+                        if (!chunk_addr) {
+                                palloc_dbg(a, "balloc failed: 0x%llx\n",
+                                           chunk_len);
+                                chunk_len >>= 1;
+                                max_chunk_len = chunk_len;
+                        }
+                } while (!chunk_addr && chunk_len >= a->page_size);
+                chunk_pages = chunk_len >> a->page_shift;
+                if (!chunk_addr) {
+                        palloc_dbg(a, "bailing @ 0x%llx\n", chunk_len);
+                        goto fail_cleanup;
+                }
+                sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl));
+                if (!sgl) {
+                        nvgpu_free(&a->source_allocator, chunk_addr);
+                        goto fail_cleanup;
+                }
+                pages -= chunk_pages;
+                sgl->phys   = chunk_addr;
+                sgl->dma    = chunk_addr;
+                sgl->length = chunk_len;
+                /*
+                 * Build the singly linked list with a head node that is part of
+                 * the list.
+                 */
+                if (prev_sgl)
+                        prev_sgl->next = sgl;
+                else
+                        alloc->sgt.sgl = sgl;
+                prev_sgl = sgl;
+                i++;
+        }
+        alloc->nr_chunks = i;
+        alloc->base = ((struct nvgpu_mem_sgl *)alloc->sgt.sgl)->phys;
+        return alloc;
+fail_cleanup:
+        sgl = alloc->sgt.sgl;
+        while (sgl) {
+                struct nvgpu_mem_sgl *next = sgl->next;
+                nvgpu_free(&a->source_allocator, sgl->phys);
+                nvgpu_kfree(a->owner->g, sgl);
+                sgl = next;
+        }
+        nvgpu_kmem_cache_free(a->alloc_cache, alloc);
+fail:
+        return NULL;
+}
+static struct nvgpu_page_alloc *__nvgpu_alloc_pages(
+        struct nvgpu_page_allocator *a, u64 len)
+{
+        struct nvgpu_page_alloc *alloc = NULL;
+        struct nvgpu_mem_sgl *sgl;
+        u64 pages;
+        int i = 0;
+        pages = ALIGN(len, a->page_size) >> a->page_shift;
+        alloc = __do_nvgpu_alloc_pages(a, pages);
+        if (!alloc) {
+                palloc_dbg(a, "Alloc 0x%llx (%llu) (failed)\n",
+                           pages << a->page_shift, pages);
+                return NULL;
+        }
+        palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n",
+                   pages << a->page_shift, pages, alloc->base);
+        sgl = alloc->sgt.sgl;
+        while (sgl) {
+                palloc_dbg(a, "  Chunk %2d: 0x%010llx + 0x%llx\n",
+                           i++,
+                           nvgpu_sgt_get_phys(&alloc->sgt, sgl),
+                           nvgpu_sgt_get_length(&alloc->sgt, sgl));
+                sgl = nvgpu_sgt_get_next(&alloc->sgt, sgl);
+        }
+        palloc_dbg(a, "Alloc done\n");
+        return alloc;
+}
+/*
+ * Allocate enough pages to satisfy @len. Page size is determined at
+ * initialization of the allocator.
+ *
+ * The return is actually a pointer to a struct nvgpu_page_alloc pointer. This
+ * is because it doesn't make a lot of sense to return the address of the first
+ * page in the list of pages (since they could be discontiguous). This has
+ * precedent in the dma_alloc APIs, though, it's really just an annoying
+ * artifact of the fact that the nvgpu_alloc() API requires a u64 return type.
+ */
+static u64 nvgpu_page_alloc(struct nvgpu_allocator *__a, u64 len)
+{
+        struct nvgpu_page_allocator *a = page_allocator(__a);
+        struct nvgpu_page_alloc *alloc = NULL;
+        u64 real_len;
+        /*
+         * If we want contig pages we have to round up to a power of two. It's
+         * easier to do that here than in the buddy allocator.
+         */
+        real_len = a->flags & GPU_ALLOC_FORCE_CONTIG ?
+                roundup_pow_of_two(len) : len;
+        alloc_lock(__a);
+        if (a->flags & GPU_ALLOC_4K_VIDMEM_PAGES &&
+            real_len <= (a->page_size / 2))
+                alloc = __nvgpu_alloc_slab(a, real_len);
+        else
+                alloc = __nvgpu_alloc_pages(a, real_len);
+        if (!alloc) {
+                alloc_unlock(__a);
+                return 0;
+        }
+        __insert_page_alloc(a, alloc);
+        a->nr_allocs++;
+        if (real_len > a->page_size / 2)
+                a->pages_alloced += alloc->length >> a->page_shift;
+        alloc_unlock(__a);
+        if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER)
+                return alloc->base;
+        else
+                return (u64) (uintptr_t) alloc;
+}
+/*
+ * Note: this will remove the nvgpu_page_alloc struct from the RB tree
+ * if it's found.
+ */
+static void nvgpu_page_free(struct nvgpu_allocator *__a, u64 base)
+{
+        struct nvgpu_page_allocator *a = page_allocator(__a);
+        struct nvgpu_page_alloc *alloc;
+        alloc_lock(__a);
+        if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER)
+                alloc = __find_page_alloc(a, base);
+        else
+                alloc = __find_page_alloc(a,
+                        ((struct nvgpu_page_alloc *)(uintptr_t)base)->base);
+        if (!alloc) {
+                palloc_dbg(a, "Hrm, found no alloc?\n");
+                goto done;
+        }
+        a->nr_frees++;
+        palloc_dbg(a, "Free  0x%llx id=0x%010llx\n",
+                   alloc->length, alloc->base);
+        /*
+         * Frees *alloc.
+         */
+        if (alloc->slab_page) {
+                __nvgpu_free_slab(a, alloc);
+        } else {
+                a->pages_freed += (alloc->length >> a->page_shift);
+                __nvgpu_free_pages(a, alloc, true);
+        }
+done:
+        alloc_unlock(__a);
+}
+static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed(
+        struct nvgpu_page_allocator *a, u64 base, u64 length, u32 unused)
+{
+        struct nvgpu_page_alloc *alloc;
+        struct nvgpu_mem_sgl *sgl;
+        alloc = nvgpu_kmem_cache_alloc(a->alloc_cache);
+        sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl));
+        if (!alloc || !sgl)
+                goto fail;
+        alloc->sgt.ops = &page_alloc_sgl_ops;
+        alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length, 0);
+        if (!alloc->base) {
+                WARN(1, "nvgpu: failed to fixed alloc pages @ 0x%010llx", base);
+                goto fail;
+        }
+        alloc->nr_chunks = 1;
+        alloc->length = length;
+        alloc->sgt.sgl = sgl;
+        sgl->phys   = alloc->base;
+        sgl->dma    = alloc->base;
+        sgl->length = length;
+        sgl->next   = NULL;
+        return alloc;
+fail:
+        if (sgl)
+                nvgpu_kfree(a->owner->g, sgl);
+        if (alloc)
+                nvgpu_kmem_cache_free(a->alloc_cache, alloc);
+        return NULL;
+}
+/*
+ * @page_size is ignored.
+ */
+static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a,
+                                  u64 base, u64 len, u32 page_size)
+{
+        struct nvgpu_page_allocator *a = page_allocator(__a);
+        struct nvgpu_page_alloc *alloc = NULL;
+        struct nvgpu_mem_sgl *sgl;
+        u64 aligned_len, pages;
+        int i = 0;
+        aligned_len = ALIGN(len, a->page_size);
+        pages = aligned_len >> a->page_shift;
+        alloc_lock(__a);
+        alloc = __nvgpu_alloc_pages_fixed(a, base, aligned_len, 0);
+        if (!alloc) {
+                alloc_unlock(__a);
+                return 0;
+        }
+        __insert_page_alloc(a, alloc);
+        alloc_unlock(__a);
+        palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)\n",
+                   alloc->base, aligned_len, pages);
+        sgl = alloc->sgt.sgl;
+        while (sgl) {
+                palloc_dbg(a, "  Chunk %2d: 0x%010llx + 0x%llx\n",
+                           i++,
+                           nvgpu_sgt_get_phys(&alloc->sgt, sgl),
+                           nvgpu_sgt_get_length(&alloc->sgt, sgl));
+                sgl = nvgpu_sgt_get_next(&alloc->sgt, sgl);
+        }
+        a->nr_fixed_allocs++;
+        a->pages_alloced += pages;
+        if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER)
+                return alloc->base;
+        else
+                return (u64) (uintptr_t) alloc;
+}
+static void nvgpu_page_free_fixed(struct nvgpu_allocator *__a,
+                                  u64 base, u64 len)
+{
+        struct nvgpu_page_allocator *a = page_allocator(__a);
+        struct nvgpu_page_alloc *alloc;
+        alloc_lock(__a);
+        if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) {
+                alloc = __find_page_alloc(a, base);
+                if (!alloc)
+                        goto done;
+        } else {
+                alloc = (struct nvgpu_page_alloc *) (uintptr_t) base;
+        }
+        palloc_dbg(a, "Free  [fixed] 0x%010llx + 0x%llx\n",
+                   alloc->base, alloc->length);
+        a->nr_fixed_frees++;
+        a->pages_freed += (alloc->length >> a->page_shift);
+        /*
+         * This works for the time being since the buddy allocator
+         * uses the same free function for both fixed and regular
+         * allocs. This would have to be updated if the underlying
+         * allocator were to change.
+         */
+        __nvgpu_free_pages(a, alloc, true);
+done:
+        alloc_unlock(__a);
+}
+static void nvgpu_page_allocator_destroy(struct nvgpu_allocator *__a)
+{
+        struct nvgpu_page_allocator *a = page_allocator(__a);
+        alloc_lock(__a);
+        nvgpu_kfree(nvgpu_alloc_to_gpu(__a), a);
+        __a->priv = NULL;
+        alloc_unlock(__a);
+}
+#ifdef __KERNEL__
+static void nvgpu_page_print_stats(struct nvgpu_allocator *__a,
+                                   struct seq_file *s, int lock)
+{
+        struct nvgpu_page_allocator *a = page_allocator(__a);
+        int i;
+        if (lock)
+                alloc_lock(__a);
+        __alloc_pstat(s, __a, "Page allocator:\n");
+        __alloc_pstat(s, __a, "  allocs         %lld\n", a->nr_allocs);
+        __alloc_pstat(s, __a, "  frees          %lld\n", a->nr_frees);
+        __alloc_pstat(s, __a, "  fixed_allocs   %lld\n", a->nr_fixed_allocs);
+        __alloc_pstat(s, __a, "  fixed_frees    %lld\n", a->nr_fixed_frees);
+        __alloc_pstat(s, __a, "  slab_allocs    %lld\n", a->nr_slab_allocs);
+        __alloc_pstat(s, __a, "  slab_frees     %lld\n", a->nr_slab_frees);
+        __alloc_pstat(s, __a, "  pages alloced  %lld\n", a->pages_alloced);
+        __alloc_pstat(s, __a, "  pages freed    %lld\n", a->pages_freed);
+        __alloc_pstat(s, __a, "\n");
+        __alloc_pstat(s, __a, "Page size:       %lld KB\n",
+                      a->page_size >> 10);
+        __alloc_pstat(s, __a, "Total pages:     %lld (%lld MB)\n",
+                      a->length / a->page_size,
+                      a->length >> 20);
+        __alloc_pstat(s, __a, "Available pages: %lld (%lld MB)\n",
+                      nvgpu_alloc_space(&a->source_allocator) / a->page_size,
+                      nvgpu_alloc_space(&a->source_allocator) >> 20);
+        __alloc_pstat(s, __a, "\n");
+        /*
+         * Slab info.
+         */
+        if (a->flags & GPU_ALLOC_4K_VIDMEM_PAGES) {
+                __alloc_pstat(s, __a, "Slabs:\n");
+                __alloc_pstat(s, __a, "  size      empty     partial   full\n");
+                __alloc_pstat(s, __a, "  ----      -----     -------   ----\n");
+                for (i = 0; i < a->nr_slabs; i++) {
+                        struct page_alloc_slab *slab = &a->slabs[i];
+                        __alloc_pstat(s, __a, "  %-9u %-9d %-9u %u\n",
+                                      slab->slab_size,
+                                      slab->nr_empty, slab->nr_partial,
+                                      slab->nr_full);
+                }
+                __alloc_pstat(s, __a, "\n");
+        }
+        __alloc_pstat(s, __a, "Source alloc: %s\n",
+                      a->source_allocator.name);
+        nvgpu_alloc_print_stats(&a->source_allocator, s, lock);
+        if (lock)
+                alloc_unlock(__a);
+}
+#endif
+static const struct nvgpu_allocator_ops page_ops = {
+        .alloc          = nvgpu_page_alloc,
+        .free           = nvgpu_page_free,
+        .alloc_fixed    = nvgpu_page_alloc_fixed,
+        .free_fixed     = nvgpu_page_free_fixed,
+        .reserve_carveout       = nvgpu_page_reserve_co,
+        .release_carveout       = nvgpu_page_release_co,
+        .base           = nvgpu_page_alloc_base,
+        .length         = nvgpu_page_alloc_length,
+        .end            = nvgpu_page_alloc_end,
+        .inited         = nvgpu_page_alloc_inited,
+        .space          = nvgpu_page_alloc_space,
+        .fini           = nvgpu_page_allocator_destroy,
+#ifdef __KERNEL__
+        .print_stats    = nvgpu_page_print_stats,
+#endif
+};
+/*
+ * nr_slabs is computed as follows: divide page_size by 4096 to get number of
+ * 4k pages in page_size. Then take the base 2 log of that to get number of
+ * slabs. For 64k page_size that works on like:
+ *
+ *   1024*64 / 1024*4 = 16
+ *   ilog2(16) = 4
+ *
+ * That gives buckets of 1, 2, 4, and 8 pages (i.e 4k, 8k, 16k, 32k).
+ */
+static int nvgpu_page_alloc_init_slabs(struct nvgpu_page_allocator *a)
+{
+        size_t nr_slabs = ilog2(a->page_size >> 12);
+        unsigned int i;
+        a->slabs = nvgpu_kcalloc(nvgpu_alloc_to_gpu(a->owner),
+                                 nr_slabs,
+                                 sizeof(struct page_alloc_slab));
+        if (!a->slabs)
+                return -ENOMEM;
+        a->nr_slabs = nr_slabs;
+        for (i = 0; i < nr_slabs; i++) {
+                struct page_alloc_slab *slab = &a->slabs[i];
+                slab->slab_size = SZ_4K * (1 << i);
+                nvgpu_init_list_node(&slab->empty);
+                nvgpu_init_list_node(&slab->partial);
+                nvgpu_init_list_node(&slab->full);
+                slab->nr_empty = 0;
+                slab->nr_partial = 0;
+                slab->nr_full = 0;
+        }
+        return 0;
+}
+int nvgpu_page_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
+                              const char *name, u64 base, u64 length,
+                              u64 blk_size, u64 flags)
+{
+        struct nvgpu_page_allocator *a;
+        char buddy_name[sizeof(__a->name)];
+        int err;
+        if (blk_size < SZ_4K)
+                return -EINVAL;
+        a = nvgpu_kzalloc(g, sizeof(struct nvgpu_page_allocator));
+        if (!a)
+                return -ENOMEM;
+        err = __nvgpu_alloc_common_init(__a, g, name, a, false, &page_ops);
+        if (err)
+                goto fail;
+        a->alloc_cache = nvgpu_kmem_cache_create(g,
+                                        sizeof(struct nvgpu_page_alloc));
+        a->slab_page_cache = nvgpu_kmem_cache_create(g,
+                                        sizeof(struct page_alloc_slab_page));
+        if (!a->alloc_cache || !a->slab_page_cache) {
+                err = -ENOMEM;
+                goto fail;
+        }
+        a->base = base;
+        a->length = length;
+        a->page_size = blk_size;
+        a->page_shift = __ffs(blk_size);
+        a->allocs = NULL;
+        a->owner = __a;
+        a->flags = flags;
+        if (flags & GPU_ALLOC_4K_VIDMEM_PAGES && blk_size > SZ_4K) {
+                err = nvgpu_page_alloc_init_slabs(a);
+                if (err)
+                        goto fail;
+        }
+        snprintf(buddy_name, sizeof(buddy_name), "%s-src", name);
+        err = nvgpu_buddy_allocator_init(g, &a->source_allocator, buddy_name,
+                                         base, length, blk_size, 0);
+        if (err)
+                goto fail;
+#ifdef CONFIG_DEBUG_FS
+        nvgpu_init_alloc_debug(g, __a);
+#endif
+        palloc_dbg(a, "New allocator: type      page\n");
+        palloc_dbg(a, "               base      0x%llx\n", a->base);
+        palloc_dbg(a, "               size      0x%llx\n", a->length);
+        palloc_dbg(a, "               page_size 0x%llx\n", a->page_size);
+        palloc_dbg(a, "               flags     0x%llx\n", a->flags);
+        palloc_dbg(a, "               slabs:    %d\n", a->nr_slabs);
+        return 0;
+fail:
+        if (a->alloc_cache)
+                nvgpu_kmem_cache_destroy(a->alloc_cache);
+        if (a->slab_page_cache)
+                nvgpu_kmem_cache_destroy(a->slab_page_cache);
+        nvgpu_kfree(g, a);
+        return err;
+}
diff --git a/drivers/gpu/nvgpu/common/mm/pd_cache.c b/drivers/gpu/nvgpu/common/mm/pd_cache.c
new file mode 100644
index 00000000..4c3e06ba
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/pd_cache.c
@@ -0,0 +1,444 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <nvgpu/log.h>
+#include <nvgpu/dma.h>
+#include <nvgpu/gmmu.h>
+#include <nvgpu/nvgpu_mem.h>
+#include <nvgpu/list.h>
+#include <nvgpu/log2.h>
+#include "gk20a/gk20a.h"
+#include "gk20a/mm_gk20a.h"
+#define pd_dbg(g, fmt, args...) nvgpu_log(g, gpu_dbg_pd_cache, fmt, ##args)
+/**
+ * DOC: PD cache
+ *
+ * In the name of saving memory with the many sub-page sized PD levels in Pascal
+ * and beyond a way of packing PD tables together is necessary. This code here
+ * does just that. If a PD table only requires 1024 bytes, then it is possible
+ * to have 4 of these PDs in one page. This is even more pronounced for 256 byte
+ * PD tables.
+ *
+ * The pd cache is basially just a slab allocator. Each instance of the nvgpu
+ * driver makes one of these structs:
+ *
+ *   struct nvgpu_pd_cache {
+ *      struct nvgpu_list_node           full[NVGPU_PD_CACHE_COUNT];
+ *      struct nvgpu_list_node           partial[NVGPU_PD_CACHE_COUNT];
+ *
+ *      struct nvgpu_rbtree_node        *mem_tree;
+ *   };
+ *
+ * There are two sets of lists, the full and the partial. The full lists contain
+ * pages of memory for which all the memory in that page is in use. The partial
+ * lists contain partially full pages of memory which can be used for more PD
+ * allocations. There a couple of assumptions here:
+ *
+ *   1. PDs greater than or equal to the page size bypass the pd cache.
+ *   2. PDs are always power of 2 and greater than %NVGPU_PD_CACHE_MIN bytes.
+ *
+ * There are NVGPU_PD_CACHE_COUNT full lists and the same number of partial
+ * lists. For a 4Kb page NVGPU_PD_CACHE_COUNT is 4. This is enough space for
+ * 256, 512, 1024, and 2048 byte PDs.
+ *
+ * __nvgpu_pd_alloc() will allocate a PD for the GMMU. It will check if the PD
+ * size is page size or larger and choose the correct allocation scheme - either
+ * from the PD cache or directly. Similarly __nvgpu_pd_free() will free a PD
+ * allocated by __nvgpu_pd_alloc().
+ *
+ * Since the top level PD (the PDB) is a page aligned pointer but less than a
+ * page size the direct functions must be used for allocating PDBs. Otherwise
+ * there would be alignment issues for the PDBs when they get packed.
+ */
+static u32 nvgpu_pd_cache_nr(u32 bytes)
+{
+        return ilog2(bytes >> (NVGPU_PD_CACHE_MIN_SHIFT - 1));
+}
+static u32 nvgpu_pd_cache_get_mask(struct nvgpu_pd_mem_entry *pentry)
+{
+        u32 mask_offset = 1 << (PAGE_SIZE / pentry->pd_size);
+        return mask_offset - 1;
+}
+int nvgpu_pd_cache_init(struct gk20a *g)
+{
+        struct nvgpu_pd_cache *cache;
+        int i;
+        /*
+         * This gets called from finalize_poweron() so we need to make sure we
+         * don't reinit the pd_cache over and over.
+         */
+        if (g->mm.pd_cache)
+                return 0;
+        cache = nvgpu_kzalloc(g, sizeof(*cache));
+        if (!cache) {
+                nvgpu_err(g, "Failed to alloc pd_cache!");
+                return -ENOMEM;
+        }
+        for (i = 0; i < NVGPU_PD_CACHE_COUNT; i++) {
+                nvgpu_init_list_node(&cache->full[i]);
+                nvgpu_init_list_node(&cache->partial[i]);
+        }
+        cache->mem_tree = NULL;
+        g->mm.pd_cache = cache;
+        nvgpu_mutex_init(&cache->lock);
+        pd_dbg(g, "PD cache initialized!");
+        return 0;
+}
+void nvgpu_pd_cache_fini(struct gk20a *g)
+{
+        int i;
+        struct nvgpu_pd_cache *cache = g->mm.pd_cache;
+        if (!cache)
+                return;
+        for (i = 0; i < NVGPU_PD_CACHE_COUNT; i++) {
+                WARN_ON(!nvgpu_list_empty(&cache->full[i]));
+                WARN_ON(!nvgpu_list_empty(&cache->partial[i]));
+        }
+        nvgpu_kfree(g, g->mm.pd_cache);
+}
+/*
+ * This is the simple pass-through for greater than page or page sized PDs.
+ *
+ * Note: this does not need the cache lock since it does not modify any of the
+ * PD cache data structures.
+ */
+int __nvgpu_pd_cache_alloc_direct(struct gk20a *g,
+                                  struct nvgpu_gmmu_pd *pd, u32 bytes)
+{
+        int err;
+        unsigned long flags = 0;
+        pd_dbg(g, "PD-Alloc [D] %u bytes", bytes);
+        pd->mem = nvgpu_kzalloc(g, sizeof(*pd->mem));
+        if (!pd->mem) {
+                nvgpu_err(g, "OOM allocating nvgpu_mem struct!");
+                return -ENOMEM;
+        }
+        /*
+         * If bytes == PAGE_SIZE then it's impossible to get a discontiguous DMA
+         * allocation. Some DMA implementations may, despite this fact, still
+         * use the contiguous pool for page sized allocations. As such only
+         * request explicitly contiguous allocs if the page directory is larger
+         * than the page size. Also, of course, this is all only revelant for
+         * GPUs not using an IOMMU. If there is an IOMMU DMA allocs are always
+         * going to be virtually contiguous and we don't have to force the
+         * underlying allocations to be physically contiguous as well.
+         */
+        if (!nvgpu_iommuable(g) && bytes > PAGE_SIZE)
+                flags = NVGPU_DMA_FORCE_CONTIGUOUS;
+        err = nvgpu_dma_alloc_flags(g, flags, bytes, pd->mem);
+        if (err) {
+                nvgpu_err(g, "OOM allocating page directory!");
+                nvgpu_kfree(g, pd->mem);
+                return -ENOMEM;
+        }
+        pd->cached = false;
+        pd->mem_offs = 0;
+        return 0;
+}
+/*
+ * Make a new nvgpu_pd_cache_entry and allocate a PD from it. Update the passed
+ * pd to reflect this allocation.
+ */
+static int nvgpu_pd_cache_alloc_new(struct gk20a *g,
+                                    struct nvgpu_pd_cache *cache,
+                                    struct nvgpu_gmmu_pd *pd,
+                                    u32 bytes)
+{
+        struct nvgpu_pd_mem_entry *pentry;
+        pd_dbg(g, "PD-Alloc [C]   New: offs=0");
+        pentry = nvgpu_kzalloc(g, sizeof(*pentry));
+        if (!pentry) {
+                nvgpu_err(g, "OOM allocating pentry!");
+                return -ENOMEM;
+        }
+        if (nvgpu_dma_alloc(g, PAGE_SIZE, &pentry->mem)) {
+                nvgpu_kfree(g, pentry);
+                nvgpu_err(g, "Unable to DMA alloc!");
+                return -ENOMEM;
+        }
+        pentry->pd_size = bytes;
+        nvgpu_list_add(&pentry->list_entry,
+                       &cache->partial[nvgpu_pd_cache_nr(bytes)]);
+        /*
+         * This allocates the very first PD table in the set of tables in this
+         * nvgpu_pd_mem_entry.
+         */
+        pentry->alloc_map = 1;
+        /*
+         * Now update the nvgpu_gmmu_pd to reflect this allocation.
+         */
+        pd->mem = &pentry->mem;
+        pd->mem_offs = 0;
+        pd->cached = true;
+        pentry->tree_entry.key_start = (u64)(uintptr_t)&pentry->mem;
+        nvgpu_rbtree_insert(&pentry->tree_entry, &cache->mem_tree);
+        return 0;
+}
+static int nvgpu_pd_cache_alloc_from_partial(struct gk20a *g,
+                                             struct nvgpu_pd_cache *cache,
+                                             struct nvgpu_pd_mem_entry *pentry,
+                                             struct nvgpu_gmmu_pd *pd)
+{
+        unsigned long bit_offs;
+        u32 mem_offs;
+        u32 pentry_mask = nvgpu_pd_cache_get_mask(pentry);
+        /*
+         * Find and allocate an open PD.
+         */
+        bit_offs = ffz(pentry->alloc_map);
+        mem_offs = bit_offs * pentry->pd_size;
+        /* Bit map full. Somethings wrong. */
+        if (WARN_ON(bit_offs >= ffz(pentry_mask)))
+                return -ENOMEM;
+        pentry->alloc_map |= 1 << bit_offs;
+        pd_dbg(g, "PD-Alloc [C]   Partial: offs=%lu", bit_offs);
+        /*
+         * First update the pd.
+         */
+        pd->mem = &pentry->mem;
+        pd->mem_offs = mem_offs;
+        pd->cached = true;
+        /*
+         * Now make sure the pentry is in the correct list (full vs partial).
+         */
+        if ((pentry->alloc_map & pentry_mask) == pentry_mask) {
+                pd_dbg(g, "Adding pentry to full list!");
+                nvgpu_list_del(&pentry->list_entry);
+                nvgpu_list_add(&pentry->list_entry,
+                        &cache->full[nvgpu_pd_cache_nr(pentry->pd_size)]);
+        }
+        return 0;
+}
+/*
+ * Get a partially full nvgpu_pd_mem_entry. Returns NULL if there is no partial
+ * nvgpu_pd_mem_entry's.
+ */
+static struct nvgpu_pd_mem_entry *nvgpu_pd_cache_get_partial(
+        struct nvgpu_pd_cache *cache, u32 bytes)
+{
+        struct nvgpu_list_node *list =
+                &cache->partial[nvgpu_pd_cache_nr(bytes)];
+        if (nvgpu_list_empty(list))
+                return NULL;
+        return nvgpu_list_first_entry(list,
+                                      nvgpu_pd_mem_entry,
+                                      list_entry);
+}
+/*
+ * Allocate memory from an nvgpu_mem for the page directory.
+ */
+static int nvgpu_pd_cache_alloc(struct gk20a *g, struct nvgpu_pd_cache *cache,
+                                struct nvgpu_gmmu_pd *pd, u32 bytes)
+{
+        struct nvgpu_pd_mem_entry *pentry;
+        int err;
+        pd_dbg(g, "PD-Alloc [C] %u bytes", bytes);
+        if (bytes & (bytes - 1) ||
+            (bytes >= PAGE_SIZE ||
+             bytes < NVGPU_PD_CACHE_MIN)) {
+                pd_dbg(g, "PD-Alloc [C]   Invalid (bytes=%u)!", bytes);
+                return -EINVAL;
+        }
+        pentry = nvgpu_pd_cache_get_partial(cache, bytes);
+        if (!pentry)
+                err = nvgpu_pd_cache_alloc_new(g, cache, pd, bytes);
+        else
+                err = nvgpu_pd_cache_alloc_from_partial(g, cache, pentry, pd);
+        if (err)
+                nvgpu_err(g, "PD-Alloc [C] Failed!");
+        return err;
+}
+/*
+ * Allocate the DMA memory for a page directory. This handles the necessary PD
+ * cache logistics. Since on Parker and later GPUs some of the page  directories
+ * are smaller than a page packing these PDs together saves a lot of memory.
+ */
+int __nvgpu_pd_alloc(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd, u32 bytes)
+{
+        struct gk20a *g = gk20a_from_vm(vm);
+        int err;
+        /*
+         * Simple case: PD is bigger than a page so just do a regular DMA
+         * alloc.
+         */
+        if (bytes >= PAGE_SIZE) {
+                err = __nvgpu_pd_cache_alloc_direct(g, pd, bytes);
+                if (err)
+                        return err;
+                return 0;
+        }
+        if (WARN_ON(!g->mm.pd_cache))
+                return -ENOMEM;
+        nvgpu_mutex_acquire(&g->mm.pd_cache->lock);
+        err = nvgpu_pd_cache_alloc(g, g->mm.pd_cache, pd, bytes);
+        nvgpu_mutex_release(&g->mm.pd_cache->lock);
+        return err;
+}
+void __nvgpu_pd_cache_free_direct(struct gk20a *g, struct nvgpu_gmmu_pd *pd)
+{
+        pd_dbg(g, "PD-Free  [D] 0x%p", pd->mem);
+        if (!pd->mem)
+                return;
+        nvgpu_dma_free(g, pd->mem);
+        nvgpu_kfree(g, pd->mem);
+        pd->mem = NULL;
+}
+static void nvgpu_pd_cache_free_mem_entry(struct gk20a *g,
+                                          struct nvgpu_pd_cache *cache,
+                                          struct nvgpu_pd_mem_entry *pentry)
+{
+        nvgpu_dma_free(g, &pentry->mem);
+        nvgpu_list_del(&pentry->list_entry);
+        nvgpu_rbtree_unlink(&pentry->tree_entry, &cache->mem_tree);
+        nvgpu_kfree(g, pentry);
+}
+static void nvgpu_pd_cache_do_free(struct gk20a *g,
+                                   struct nvgpu_pd_cache *cache,
+                                   struct nvgpu_pd_mem_entry *pentry,
+                                   struct nvgpu_gmmu_pd *pd)
+{
+        u32 index = pd->mem_offs / pentry->pd_size;
+        u32 bit = 1 << index;
+        /* Mark entry as free. */
+        pentry->alloc_map &= ~bit;
+        if (pentry->alloc_map & nvgpu_pd_cache_get_mask(pentry)) {
+                /*
+                 * Partially full still. If it was already on the partial list
+                 * this just re-adds it.
+                 */
+                nvgpu_list_del(&pentry->list_entry);
+                nvgpu_list_add(&pentry->list_entry,
+                        &cache->partial[nvgpu_pd_cache_nr(pentry->pd_size)]);
+        } else {
+                /* Empty now so free it. */
+                nvgpu_pd_cache_free_mem_entry(g, cache, pentry);
+        }
+}
+static struct nvgpu_pd_mem_entry *nvgpu_pd_cache_look_up(
+        struct gk20a *g,
+        struct nvgpu_pd_cache *cache,
+        struct nvgpu_gmmu_pd *pd)
+{
+        struct nvgpu_rbtree_node *node;
+        nvgpu_rbtree_search((u64)(uintptr_t)pd->mem, &node,
+                            cache->mem_tree);
+        if (!node)
+                return NULL;
+        return nvgpu_pd_mem_entry_from_tree_entry(node);
+}
+static void nvgpu_pd_cache_free(struct gk20a *g, struct nvgpu_pd_cache *cache,
+                                struct nvgpu_gmmu_pd *pd)
+{
+        struct nvgpu_pd_mem_entry *pentry;
+        pd_dbg(g, "PD-Free  [C] 0x%p", pd->mem);
+        pentry = nvgpu_pd_cache_look_up(g, cache, pd);
+        if (!pentry) {
+                WARN(1, "Attempting to free non-existent pd");
+                return;
+        }
+        nvgpu_pd_cache_do_free(g, cache, pentry, pd);
+}
+void __nvgpu_pd_free(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd)
+{
+        struct gk20a *g = gk20a_from_vm(vm);
+        /*
+         * Simple case: just DMA free.
+         */
+        if (!pd->cached)
+                return __nvgpu_pd_cache_free_direct(g, pd);
+        nvgpu_mutex_acquire(&g->mm.pd_cache->lock);
+        nvgpu_pd_cache_free(g, g->mm.pd_cache, pd);
+        nvgpu_mutex_release(&g->mm.pd_cache->lock);
+}
diff --git a/drivers/gpu/nvgpu/common/mm/vidmem.c b/drivers/gpu/nvgpu/common/mm/vidmem.c
new file mode 100644
index 00000000..3526fce5
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/vidmem.c
@@ -0,0 +1,554 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <linux/scatterlist.h>
+#include <nvgpu/timers.h>
+#include <nvgpu/dma.h>
+#include <nvgpu/vidmem.h>
+#include <nvgpu/page_allocator.h>
+#include <nvgpu/enabled.h>
+#include "gk20a/gk20a.h"
+#include "gk20a/mm_gk20a.h"
+/*
+ * This is expected to be called from the shutdown path (or the error path in
+ * the vidmem init code). As such we do not expect new vidmem frees to be
+ * enqueued.
+ */
+void nvgpu_vidmem_destroy(struct gk20a *g)
+{
+        struct nvgpu_timeout timeout;
+        nvgpu_timeout_init(g, &timeout, 100, NVGPU_TIMER_RETRY_TIMER);
+        /*
+         * Ensure that the thread runs one last time to flush anything in the
+         * queue.
+         */
+        nvgpu_cond_signal_interruptible(&g->mm.vidmem.clearing_thread_cond);
+        /*
+         * Wait for at most 1 second before just continuing on. It doesn't make
+         * sense to hang the system over some potential memory leaks.
+         */
+        do {
+                bool empty;
+                nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
+                empty = nvgpu_list_empty(&g->mm.vidmem.clear_list_head);
+                nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
+                if (empty)
+                        break;
+                nvgpu_msleep(10);
+        } while (!nvgpu_timeout_expired(&timeout));
+        /*
+         * Kill the vidmem clearing thread now. This will wake the thread up
+         * automatically and cause the wait_interruptible condition trigger.
+         */
+        nvgpu_thread_stop(&g->mm.vidmem.clearing_thread);
+        if (nvgpu_alloc_initialized(&g->mm.vidmem.allocator))
+                nvgpu_alloc_destroy(&g->mm.vidmem.allocator);
+}
+static int __nvgpu_vidmem_do_clear_all(struct gk20a *g)
+{
+        struct mm_gk20a *mm = &g->mm;
+        struct gk20a_fence *gk20a_fence_out = NULL;
+        u64 region2_base = 0;
+        int err = 0;
+        if (mm->vidmem.ce_ctx_id == (u32)~0)
+                return -EINVAL;
+        vidmem_dbg(g, "Clearing all VIDMEM:");
+        err = gk20a_ce_execute_ops(g,
+                        mm->vidmem.ce_ctx_id,
+                        0,
+                        mm->vidmem.base,
+                        mm->vidmem.bootstrap_base - mm->vidmem.base,
+                        0x00000000,
+                        NVGPU_CE_DST_LOCATION_LOCAL_FB,
+                        NVGPU_CE_MEMSET,
+                        NULL,
+                        0,
+                        NULL);
+        if (err) {
+                nvgpu_err(g,
+                        "Failed to clear vidmem region 1 : %d", err);
+                return err;
+        }
+        region2_base = mm->vidmem.bootstrap_base + mm->vidmem.bootstrap_size;
+        err = gk20a_ce_execute_ops(g,
+                        mm->vidmem.ce_ctx_id,
+                        0,
+                        region2_base,
+                        mm->vidmem.size - region2_base,
+                        0x00000000,
+                        NVGPU_CE_DST_LOCATION_LOCAL_FB,
+                        NVGPU_CE_MEMSET,
+                        NULL,
+                        0,
+                        &gk20a_fence_out);
+        if (err) {
+                nvgpu_err(g,
+                        "Failed to clear vidmem region 2 : %d", err);
+                return err;
+        }
+        if (gk20a_fence_out) {
+                struct nvgpu_timeout timeout;
+                nvgpu_timeout_init(g, &timeout,
+                                   gk20a_get_gr_idle_timeout(g),
+                                   NVGPU_TIMER_CPU_TIMER);
+                do {
+                        err = gk20a_fence_wait(g, gk20a_fence_out,
+                                               gk20a_get_gr_idle_timeout(g));
+                } while (err == -ERESTARTSYS &&
+                         !nvgpu_timeout_expired(&timeout));
+                gk20a_fence_put(gk20a_fence_out);
+                if (err) {
+                        nvgpu_err(g,
+                                "fence wait failed for CE execute ops");
+                        return err;
+                }
+        }
+        mm->vidmem.cleared = true;
+        vidmem_dbg(g, "Done!");
+        return 0;
+}
+void nvgpu_vidmem_thread_pause_sync(struct mm_gk20a *mm)
+{
+        /*
+         * On the first increment of the pause_count (0 -> 1) take the pause
+         * lock and prevent the vidmem clearing thread from processing work
+         * items.
+         *
+         * Otherwise the increment is all that's needed - it's essentially a
+         * ref-count for the number of pause() calls.
+         *
+         * The sync component is implemented by waiting for the lock to be
+         * released by the clearing thread in case the thread is currently
+         * processing work items.
+         */
+        if (nvgpu_atomic_inc_return(&mm->vidmem.pause_count) == 1)
+                nvgpu_mutex_acquire(&mm->vidmem.clearing_thread_lock);
+        vidmem_dbg(mm->g, "Clearing thread paused; new count=%d",
+                   nvgpu_atomic_read(&mm->vidmem.pause_count));
+}
+void nvgpu_vidmem_thread_unpause(struct mm_gk20a *mm)
+{
+        vidmem_dbg(mm->g, "Unpausing clearing thread; current count=%d",
+                   nvgpu_atomic_read(&mm->vidmem.pause_count));
+        /*
+         * And on the last decrement (1 -> 0) release the pause lock and let
+         * the vidmem clearing thread continue.
+         */
+        if (nvgpu_atomic_dec_return(&mm->vidmem.pause_count) == 0) {
+                nvgpu_mutex_release(&mm->vidmem.clearing_thread_lock);
+                vidmem_dbg(mm->g, "  > Clearing thread really unpaused!");
+        }
+}
+int nvgpu_vidmem_clear_list_enqueue(struct gk20a *g, struct nvgpu_mem *mem)
+{
+        struct mm_gk20a *mm = &g->mm;
+        /*
+         * Crap. Can't enqueue new vidmem bufs! CE may be gone!
+         *
+         * However, an errant app can hold a vidmem dma_buf FD open past when
+         * the nvgpu driver has exited. Thus when the FD does get closed
+         * eventually the dma_buf release function will try to call the vidmem
+         * free function which will attempt to enqueue the vidmem into the
+         * vidmem clearing thread.
+         */
+        if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING))
+                return -ENOSYS;
+        nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex);
+        nvgpu_list_add_tail(&mem->clear_list_entry,
+                            &mm->vidmem.clear_list_head);
+        nvgpu_atomic64_add(mem->aligned_size, &mm->vidmem.bytes_pending);
+        nvgpu_mutex_release(&mm->vidmem.clear_list_mutex);
+        nvgpu_cond_signal_interruptible(&mm->vidmem.clearing_thread_cond);
+        return 0;
+}
+static struct nvgpu_mem *nvgpu_vidmem_clear_list_dequeue(struct mm_gk20a *mm)
+{
+        struct nvgpu_mem *mem = NULL;
+        nvgpu_mutex_acquire(&mm->vidmem.clear_list_mutex);
+        if (!nvgpu_list_empty(&mm->vidmem.clear_list_head)) {
+                mem = nvgpu_list_first_entry(&mm->vidmem.clear_list_head,
+                                nvgpu_mem, clear_list_entry);
+                nvgpu_list_del(&mem->clear_list_entry);
+        }
+        nvgpu_mutex_release(&mm->vidmem.clear_list_mutex);
+        return mem;
+}
+static void nvgpu_vidmem_clear_pending_allocs(struct mm_gk20a *mm)
+{
+        struct gk20a *g = mm->g;
+        struct nvgpu_mem *mem;
+        vidmem_dbg(g, "Running VIDMEM clearing thread:");
+        while ((mem = nvgpu_vidmem_clear_list_dequeue(mm)) != NULL) {
+                nvgpu_vidmem_clear(g, mem);
+                WARN_ON(nvgpu_atomic64_sub_return(mem->aligned_size,
+                                        &g->mm.vidmem.bytes_pending) < 0);
+                mem->size = 0;
+                mem->aperture = APERTURE_INVALID;
+                __nvgpu_mem_free_vidmem_alloc(g, mem);
+                nvgpu_kfree(g, mem);
+        }
+        vidmem_dbg(g, "Done!");
+}
+static int nvgpu_vidmem_clear_pending_allocs_thr(void *mm_ptr)
+{
+        struct mm_gk20a *mm = mm_ptr;
+        /*
+         * Simple thread who's sole job is to periodically clear userspace
+         * vidmem allocations that have been recently freed.
+         *
+         * Since it doesn't make sense to run unless there's pending work a
+         * condition field is used to wait for work. When the DMA API frees a
+         * userspace vidmem buf it enqueues it into the clear list and alerts us
+         * that we have some work to do.
+         */
+        while (!nvgpu_thread_should_stop(&mm->vidmem.clearing_thread)) {
+                int ret;
+                /*
+                 * Wait for work but also make sure we should not be paused.
+                 */
+                ret = NVGPU_COND_WAIT_INTERRUPTIBLE(
+                                &mm->vidmem.clearing_thread_cond,
+                                nvgpu_thread_should_stop(
+                                        &mm->vidmem.clearing_thread) ||
+                                !nvgpu_list_empty(&mm->vidmem.clear_list_head),
+                                0);
+                if (ret == -ERESTARTSYS)
+                        continue;
+                /*
+                 * Use this lock to implement a pause mechanism. By taking this
+                 * lock some other code can prevent this thread from processing
+                 * work items.
+                 */
+                if (!nvgpu_mutex_tryacquire(&mm->vidmem.clearing_thread_lock))
+                        continue;
+                nvgpu_vidmem_clear_pending_allocs(mm);
+                nvgpu_mutex_release(&mm->vidmem.clearing_thread_lock);
+        }
+        return 0;
+}
+int nvgpu_vidmem_init(struct mm_gk20a *mm)
+{
+        struct gk20a *g = mm->g;
+        size_t size = g->ops.mm.get_vidmem_size ?
+                g->ops.mm.get_vidmem_size(g) : 0;
+        u64 bootstrap_base, bootstrap_size, base;
+        u64 default_page_size = SZ_64K;
+        int err;
+        static struct nvgpu_alloc_carveout wpr_co =
+                NVGPU_CARVEOUT("wpr-region", 0, SZ_16M);
+        if (!size)
+                return 0;
+        vidmem_dbg(g, "init begin");
+        wpr_co.base = size - SZ_256M;
+        bootstrap_base = wpr_co.base;
+        bootstrap_size = SZ_16M;
+        base = default_page_size;
+        /*
+         * Bootstrap allocator for use before the CE is initialized (CE
+         * initialization requires vidmem but we want to use the CE to zero
+         * out vidmem before allocating it...
+         */
+        err = nvgpu_page_allocator_init(g, &g->mm.vidmem.bootstrap_allocator,
+                                        "vidmem-bootstrap",
+                                        bootstrap_base, bootstrap_size,
+                                        SZ_4K, 0);
+        err = nvgpu_page_allocator_init(g, &g->mm.vidmem.allocator,
+                                        "vidmem",
+                                        base, size - base,
+                                        default_page_size,
+                                        GPU_ALLOC_4K_VIDMEM_PAGES);
+        if (err) {
+                nvgpu_err(g, "Failed to register vidmem for size %zu: %d",
+                                size, err);
+                return err;
+        }
+        /* Reserve bootstrap region in vidmem allocator */
+        nvgpu_alloc_reserve_carveout(&g->mm.vidmem.allocator, &wpr_co);
+        mm->vidmem.base = base;
+        mm->vidmem.size = size - base;
+        mm->vidmem.bootstrap_base = bootstrap_base;
+        mm->vidmem.bootstrap_size = bootstrap_size;
+        err = nvgpu_cond_init(&mm->vidmem.clearing_thread_cond);
+        if (err)
+                goto fail;
+        nvgpu_atomic64_set(&mm->vidmem.bytes_pending, 0);
+        nvgpu_init_list_node(&mm->vidmem.clear_list_head);
+        nvgpu_mutex_init(&mm->vidmem.clear_list_mutex);
+        nvgpu_mutex_init(&mm->vidmem.clearing_thread_lock);
+        nvgpu_mutex_init(&mm->vidmem.first_clear_mutex);
+        nvgpu_atomic_set(&mm->vidmem.pause_count, 0);
+        /*
+         * Start the thread off in the paused state. The thread doesn't have to
+         * be running for this to work. It will be woken up later on in
+         * finalize_poweron(). We won't necessarily have a CE context yet
+         * either, so hypothetically one could cause a race where we try to
+         * clear a vidmem struct before we have a CE context to do so.
+         */
+        nvgpu_vidmem_thread_pause_sync(mm);
+        err = nvgpu_thread_create(&mm->vidmem.clearing_thread, mm,
+                                  nvgpu_vidmem_clear_pending_allocs_thr,
+                                  "vidmem-clear");
+        if (err)
+                goto fail;
+        vidmem_dbg(g, "VIDMEM Total: %zu MB", size >> 20);
+        vidmem_dbg(g, "VIDMEM Ranges:");
+        vidmem_dbg(g, "  0x%-10llx -> 0x%-10llx Primary",
+                   mm->vidmem.base, mm->vidmem.base + mm->vidmem.size);
+        vidmem_dbg(g, "  0x%-10llx -> 0x%-10llx Bootstrap",
+                   mm->vidmem.bootstrap_base,
+                   mm->vidmem.bootstrap_base + mm->vidmem.bootstrap_size);
+        vidmem_dbg(g, "VIDMEM carveouts:");
+        vidmem_dbg(g, "  0x%-10llx -> 0x%-10llx %s",
+                   wpr_co.base, wpr_co.base + wpr_co.length, wpr_co.name);
+        return 0;
+fail:
+        nvgpu_cond_destroy(&mm->vidmem.clearing_thread_cond);
+        nvgpu_vidmem_destroy(g);
+        return err;
+}
+int nvgpu_vidmem_get_space(struct gk20a *g, u64 *space)
+{
+        struct nvgpu_allocator *allocator = &g->mm.vidmem.allocator;
+        gk20a_dbg_fn("");
+        if (!nvgpu_alloc_initialized(allocator))
+                return -ENOSYS;
+        nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
+        *space = nvgpu_alloc_space(allocator) +
+                nvgpu_atomic64_read(&g->mm.vidmem.bytes_pending);
+        nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
+        return 0;
+}
+int nvgpu_vidmem_clear(struct gk20a *g, struct nvgpu_mem *mem)
+{
+        struct gk20a_fence *gk20a_fence_out = NULL;
+        struct gk20a_fence *gk20a_last_fence = NULL;
+        struct nvgpu_page_alloc *alloc = NULL;
+        void *sgl = NULL;
+        int err = 0;
+        if (g->mm.vidmem.ce_ctx_id == (u32)~0)
+                return -EINVAL;
+        alloc = mem->vidmem_alloc;
+        vidmem_dbg(g, "Clearing VIDMEM buf:");
+        nvgpu_sgt_for_each_sgl(sgl, &alloc->sgt) {
+                if (gk20a_last_fence)
+                        gk20a_fence_put(gk20a_last_fence);
+                err = gk20a_ce_execute_ops(g,
+                        g->mm.vidmem.ce_ctx_id,
+                        0,
+                        nvgpu_sgt_get_phys(&alloc->sgt, sgl),
+                        nvgpu_sgt_get_length(&alloc->sgt, sgl),
+                        0x00000000,
+                        NVGPU_CE_DST_LOCATION_LOCAL_FB,
+                        NVGPU_CE_MEMSET,
+                        NULL,
+                        0,
+                        &gk20a_fence_out);
+                if (err) {
+                        nvgpu_err(g,
+                                "Failed gk20a_ce_execute_ops[%d]", err);
+                        return err;
+                }
+                vidmem_dbg(g, "  > [0x%llx  +0x%llx]",
+                           nvgpu_sgt_get_phys(&alloc->sgt, sgl),
+                           nvgpu_sgt_get_length(&alloc->sgt, sgl));
+                gk20a_last_fence = gk20a_fence_out;
+        }
+        if (gk20a_last_fence) {
+                struct nvgpu_timeout timeout;
+                nvgpu_timeout_init(g, &timeout,
+                                   gk20a_get_gr_idle_timeout(g),
+                                   NVGPU_TIMER_CPU_TIMER);
+                do {
+                        err = gk20a_fence_wait(g, gk20a_last_fence,
+                                               gk20a_get_gr_idle_timeout(g));
+                } while (err == -ERESTARTSYS &&
+                         !nvgpu_timeout_expired(&timeout));
+                gk20a_fence_put(gk20a_last_fence);
+                if (err)
+                        nvgpu_err(g,
+                                "fence wait failed for CE execute ops");
+        }
+        vidmem_dbg(g, "  Done");
+        return err;
+}
+static int nvgpu_vidmem_clear_all(struct gk20a *g)
+{
+        int err;
+        if (g->mm.vidmem.cleared)
+                return 0;
+        nvgpu_mutex_acquire(&g->mm.vidmem.first_clear_mutex);
+        if (!g->mm.vidmem.cleared) {
+                err = __nvgpu_vidmem_do_clear_all(g);
+                if (err) {
+                        nvgpu_mutex_release(&g->mm.vidmem.first_clear_mutex);
+                        nvgpu_err(g, "failed to clear whole vidmem");
+                        return err;
+                }
+        }
+        nvgpu_mutex_release(&g->mm.vidmem.first_clear_mutex);
+        return 0;
+}
+struct nvgpu_vidmem_buf *nvgpu_vidmem_user_alloc(struct gk20a *g, size_t bytes)
+{
+        struct nvgpu_vidmem_buf *buf;
+        int err;
+        err = nvgpu_vidmem_clear_all(g);
+        if (err)
+                return NULL;
+        buf = nvgpu_kzalloc(g, sizeof(*buf));
+        if (!buf)
+                return NULL;
+        buf->g = g;
+        buf->mem = nvgpu_kzalloc(g, sizeof(*buf->mem));
+        if (!buf->mem)
+                goto fail;
+        err = nvgpu_dma_alloc_vid(g, bytes, buf->mem);
+        if (err)
+                goto fail;
+        /*
+         * Alerts the DMA API that when we free this vidmem buf we have to
+         * clear it to avoid leaking data to userspace.
+         */
+        buf->mem->mem_flags |= NVGPU_MEM_FLAG_USER_MEM;
+        return buf;
+fail:
+        /* buf will never be NULL here. */
+        nvgpu_kfree(g, buf->mem);
+        nvgpu_kfree(g, buf);
+        return NULL;
+}
+void nvgpu_vidmem_buf_free(struct gk20a *g, struct nvgpu_vidmem_buf *buf)
+{
+        /*
+         * In some error paths it's convenient to be able to "free" a NULL buf.
+         */
+        if (!buf)
+                return;
+        nvgpu_dma_free(g, buf->mem);
+        /*
+         * We don't free buf->mem here. This is handled by nvgpu_dma_free()!
+         * Since these buffers are cleared in the background the nvgpu_mem
+         * struct must live on through that. We transfer ownership here to the
+         * DMA API and let the DMA API free the buffer.
+         */
+        nvgpu_kfree(g, buf);
+}
diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c
new file mode 100644
index 00000000..ebe8e381
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/vm.c
@@ -0,0 +1,1145 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <nvgpu/bug.h>
+#include <uapi/linux/nvgpu.h>
+#include <nvgpu/log.h>
+#include <nvgpu/dma.h>
+#include <nvgpu/vm.h>
+#include <nvgpu/vm_area.h>
+#include <nvgpu/gmmu.h>
+#include <nvgpu/lock.h>
+#include <nvgpu/list.h>
+#include <nvgpu/rbtree.h>
+#include <nvgpu/semaphore.h>
+#include <nvgpu/enabled.h>
+#include <nvgpu/vgpu/vm.h>
+#include "gk20a/gk20a.h"
+#include "gk20a/mm_gk20a.h"
+struct nvgpu_ctag_buffer_info {
+        u64                     size;
+        enum gmmu_pgsz_gk20a    pgsz_idx;
+        u32                     flags;
+        s16                     compr_kind;
+        s16                     incompr_kind;
+        u32                     ctag_lines;
+};
+static int nvgpu_vm_compute_compression(struct vm_gk20a *vm,
+                                        struct nvgpu_ctag_buffer_info *binfo);
+static void __nvgpu_vm_unmap(struct nvgpu_mapped_buf *mapped_buffer,
+                             struct vm_gk20a_mapping_batch *batch);
+int vm_aspace_id(struct vm_gk20a *vm)
+{
+        return vm->as_share ? vm->as_share->id : -1;
+}
+static void __nvgpu_vm_free_entries(struct vm_gk20a *vm,
+                                    struct nvgpu_gmmu_pd *pd,
+                                    int level)
+{
+        int i;
+        if (pd->mem) {
+                __nvgpu_pd_free(vm, pd);
+                pd->mem = NULL;
+        }
+        if (pd->entries) {
+                for (i = 0; i < pd->num_entries; i++)
+                        __nvgpu_vm_free_entries(vm, &pd->entries[i],
+                                              level + 1);
+                nvgpu_vfree(vm->mm->g, pd->entries);
+                pd->entries = NULL;
+        }
+}
+static void nvgpu_vm_free_entries(struct vm_gk20a *vm,
+                                  struct nvgpu_gmmu_pd *pdb)
+{
+        struct gk20a *g = vm->mm->g;
+        int i;
+        __nvgpu_pd_cache_free_direct(g, pdb);
+        if (!pdb->entries)
+                return;
+        for (i = 0; i < pdb->num_entries; i++)
+                __nvgpu_vm_free_entries(vm, &pdb->entries[i], 1);
+        nvgpu_vfree(g, pdb->entries);
+        pdb->entries = NULL;
+}
+u64 __nvgpu_vm_alloc_va(struct vm_gk20a *vm, u64 size,
+                        enum gmmu_pgsz_gk20a pgsz_idx)
+{
+        struct gk20a *g = vm->mm->g;
+        struct nvgpu_allocator *vma = NULL;
+        u64 addr;
+        u64 page_size = vm->gmmu_page_sizes[pgsz_idx];
+        vma = vm->vma[pgsz_idx];
+        if (pgsz_idx >= gmmu_nr_page_sizes) {
+                nvgpu_err(g, "(%s) invalid page size requested", vma->name);
+                return 0;
+        }
+        if ((pgsz_idx == gmmu_page_size_big) && !vm->big_pages) {
+                nvgpu_err(g, "(%s) unsupportd page size requested", vma->name);
+                return 0;
+        }
+        /* Be certain we round up to page_size if needed */
+        size = (size + ((u64)page_size - 1)) & ~((u64)page_size - 1);
+        addr = nvgpu_alloc(vma, size);
+        if (!addr) {
+                nvgpu_err(g, "(%s) oom: sz=0x%llx", vma->name, size);
+                return 0;
+        }
+        return addr;
+}
+int __nvgpu_vm_free_va(struct vm_gk20a *vm, u64 addr,
+                       enum gmmu_pgsz_gk20a pgsz_idx)
+{
+        struct nvgpu_allocator *vma = vm->vma[pgsz_idx];
+        nvgpu_free(vma, addr);
+        return 0;
+}
+void nvgpu_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *mapping_batch)
+{
+        memset(mapping_batch, 0, sizeof(*mapping_batch));
+        mapping_batch->gpu_l2_flushed = false;
+        mapping_batch->need_tlb_invalidate = false;
+}
+void nvgpu_vm_mapping_batch_finish_locked(
+        struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *mapping_batch)
+{
+        /* hanging kref_put batch pointer? */
+        WARN_ON(vm->kref_put_batch == mapping_batch);
+        if (mapping_batch->need_tlb_invalidate) {
+                struct gk20a *g = gk20a_from_vm(vm);
+                g->ops.fb.tlb_invalidate(g, vm->pdb.mem);
+        }
+}
+void nvgpu_vm_mapping_batch_finish(struct vm_gk20a *vm,
+                                   struct vm_gk20a_mapping_batch *mapping_batch)
+{
+        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
+        nvgpu_vm_mapping_batch_finish_locked(vm, mapping_batch);
+        nvgpu_mutex_release(&vm->update_gmmu_lock);
+}
+/*
+ * Determine if the passed address space can support big pages or not.
+ */
+int nvgpu_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size)
+{
+        u64 mask = ((u64)vm->big_page_size << 10) - 1;
+        if (base & mask || size & mask)
+                return 0;
+        return 1;
+}
+/*
+ * Initialize a semaphore pool. Just return successfully if we do not need
+ * semaphores (i.e when sync-pts are active).
+ */
+static int nvgpu_init_sema_pool(struct vm_gk20a *vm)
+{
+        struct nvgpu_semaphore_sea *sema_sea;
+        struct mm_gk20a *mm = vm->mm;
+        struct gk20a *g = mm->g;
+        int err;
+        /*
+         * Don't waste the memory on semaphores if we don't need them.
+         */
+        if (nvgpu_is_enabled(g, NVGPU_HAS_SYNCPOINTS))
+                return 0;
+        if (vm->sema_pool)
+                return 0;
+        sema_sea = nvgpu_semaphore_sea_create(g);
+        if (!sema_sea)
+                return -ENOMEM;
+        vm->sema_pool = nvgpu_semaphore_pool_alloc(sema_sea);
+        if (!vm->sema_pool)
+                return -ENOMEM;
+        /*
+         * Allocate a chunk of GPU VA space for mapping the semaphores. We will
+         * do a fixed alloc in the kernel VM so that all channels have the same
+         * RO address range for the semaphores.
+         *
+         * !!! TODO: cleanup.
+         */
+        sema_sea->gpu_va = nvgpu_alloc_fixed(&vm->kernel,
+                                             vm->va_limit -
+                                             mm->channel.kernel_size,
+                                             512 * PAGE_SIZE,
+                                             SZ_4K);
+        if (!sema_sea->gpu_va) {
+                nvgpu_free(&vm->kernel, sema_sea->gpu_va);
+                nvgpu_vm_put(vm);
+                return -ENOMEM;
+        }
+        err = nvgpu_semaphore_pool_map(vm->sema_pool, vm);
+        if (err) {
+                nvgpu_semaphore_pool_unmap(vm->sema_pool, vm);
+                nvgpu_free(vm->vma[gmmu_page_size_small],
+                           vm->sema_pool->gpu_va);
+                return err;
+        }
+        return 0;
+}
+static int __nvgpu_vm_init(struct mm_gk20a *mm,
+                           struct vm_gk20a *vm,
+                           u32 big_page_size,
+                           u64 low_hole,
+                           u64 kernel_reserved,
+                           u64 aperture_size,
+                           bool big_pages,
+                           bool userspace_managed,
+                           char *name)
+{
+        int err;
+        char alloc_name[32];
+        u64 kernel_vma_flags;
+        u64 user_vma_start, user_vma_limit;
+        u64 user_lp_vma_start, user_lp_vma_limit;
+        u64 kernel_vma_start, kernel_vma_limit;
+        struct gk20a *g = gk20a_from_mm(mm);
+        if (WARN_ON(kernel_reserved + low_hole > aperture_size))
+                return -ENOMEM;
+        nvgpu_log_info(g, "Init space for %s: valimit=0x%llx, "
+                       "LP size=0x%x lowhole=0x%llx",
+                       name, aperture_size,
+                       (unsigned int)big_page_size, low_hole);
+        vm->mm = mm;
+        vm->gmmu_page_sizes[gmmu_page_size_small]  = SZ_4K;
+        vm->gmmu_page_sizes[gmmu_page_size_big]    = big_page_size;
+        vm->gmmu_page_sizes[gmmu_page_size_kernel] = SZ_4K;
+        /* Set up vma pointers. */
+        vm->vma[gmmu_page_size_small]  = &vm->user;
+        vm->vma[gmmu_page_size_big]    = &vm->user;
+        vm->vma[gmmu_page_size_kernel] = &vm->kernel;
+        if (!nvgpu_is_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES))
+                vm->vma[gmmu_page_size_big] = &vm->user_lp;
+        vm->va_start  = low_hole;
+        vm->va_limit  = aperture_size;
+        vm->big_page_size     = vm->gmmu_page_sizes[gmmu_page_size_big];
+        vm->userspace_managed = userspace_managed;
+        vm->mmu_levels        = g->ops.mm.get_mmu_levels(g, vm->big_page_size);
+#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
+        if (g->is_virtual && userspace_managed) {
+                nvgpu_err(g, "vGPU: no userspace managed addr space support");
+                return -ENOSYS;
+        }
+        if (g->is_virtual && vgpu_vm_init(g, vm)) {
+                nvgpu_err(g, "Failed to init vGPU VM!");
+                return -ENOMEM;
+        }
+#endif
+        /* Initialize the page table data structures. */
+        strncpy(vm->name, name, min(strlen(name), sizeof(vm->name)));
+        err = nvgpu_gmmu_init_page_table(vm);
+        if (err)
+                goto clean_up_vgpu_vm;
+        /* Setup vma limits. */
+        if (kernel_reserved + low_hole < aperture_size) {
+                /*
+                 * If big_pages are disabled for this VM then it only makes
+                 * sense to make one VM, same as if the unified address flag
+                 * is set.
+                 */
+                if (!big_pages ||
+                    nvgpu_is_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES)) {
+                        user_vma_start = low_hole;
+                        user_vma_limit = vm->va_limit - kernel_reserved;
+                        user_lp_vma_start = user_vma_limit;
+                        user_lp_vma_limit = user_vma_limit;
+                } else {
+                        user_vma_start = low_hole;
+                        user_vma_limit = __nv_gmmu_va_small_page_limit();
+                        user_lp_vma_start = __nv_gmmu_va_small_page_limit();
+                        user_lp_vma_limit = vm->va_limit - kernel_reserved;
+                }
+        } else {
+                user_vma_start = 0;
+                user_vma_limit = 0;
+                user_lp_vma_start = 0;
+                user_lp_vma_limit = 0;
+        }
+        kernel_vma_start = vm->va_limit - kernel_reserved;
+        kernel_vma_limit = vm->va_limit;
+        nvgpu_log_info(g, "user_vma     [0x%llx,0x%llx)",
+                       user_vma_start, user_vma_limit);
+        nvgpu_log_info(g, "user_lp_vma  [0x%llx,0x%llx)",
+                       user_lp_vma_start, user_lp_vma_limit);
+        nvgpu_log_info(g, "kernel_vma   [0x%llx,0x%llx)",
+                       kernel_vma_start, kernel_vma_limit);
+        if (WARN_ON(user_vma_start > user_vma_limit) ||
+            WARN_ON(user_lp_vma_start > user_lp_vma_limit) ||
+            WARN_ON(kernel_vma_start >= kernel_vma_limit)) {
+                err = -EINVAL;
+                goto clean_up_page_tables;
+        }
+        kernel_vma_flags = (kernel_reserved + low_hole) == aperture_size ?
+                0 : GPU_ALLOC_GVA_SPACE;
+        /*
+         * A "user" area only makes sense for the GVA spaces. For VMs where
+         * there is no "user" area user_vma_start will be equal to
+         * user_vma_limit (i.e a 0 sized space). In such a situation the kernel
+         * area must be non-zero in length.
+         */
+        if (user_vma_start >= user_vma_limit &&
+            kernel_vma_start >= kernel_vma_limit) {
+                err = -EINVAL;
+                goto clean_up_page_tables;
+        }
+        /*
+         * Determine if big pages are possible in this VM. If a split address
+         * space is used then check the user_lp vma instead of the user vma.
+         */
+        if (nvgpu_is_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES))
+                vm->big_pages = big_pages &&
+                        nvgpu_big_pages_possible(vm, user_vma_start,
+                                        user_vma_limit - user_vma_start);
+        else
+                vm->big_pages = big_pages &&
+                        nvgpu_big_pages_possible(vm, user_lp_vma_start,
+                                        user_lp_vma_limit - user_lp_vma_start);
+        /*
+         * User VMA.
+         */
+        if (user_vma_start < user_vma_limit) {
+                snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s", name);
+                err = __nvgpu_buddy_allocator_init(g, &vm->user,
+                                                   vm, alloc_name,
+                                                   user_vma_start,
+                                                   user_vma_limit -
+                                                   user_vma_start,
+                                                   SZ_4K,
+                                                   GPU_BALLOC_MAX_ORDER,
+                                                   GPU_ALLOC_GVA_SPACE);
+                if (err)
+                        goto clean_up_page_tables;
+        } else {
+                /*
+                 * Make these allocator pointers point to the kernel allocator
+                 * since we still use the legacy notion of page size to choose
+                 * the allocator.
+                 */
+                vm->vma[0] = &vm->kernel;
+                vm->vma[1] = &vm->kernel;
+        }
+        /*
+         * User VMA for large pages when a split address range is used.
+         */
+        if (user_lp_vma_start < user_lp_vma_limit) {
+                snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s_lp", name);
+                err = __nvgpu_buddy_allocator_init(g, &vm->user_lp,
+                                                   vm, alloc_name,
+                                                   user_lp_vma_start,
+                                                   user_lp_vma_limit -
+                                                   user_lp_vma_start,
+                                                   vm->big_page_size,
+                                                   GPU_BALLOC_MAX_ORDER,
+                                                   GPU_ALLOC_GVA_SPACE);
+                if (err)
+                        goto clean_up_allocators;
+        }
+        /*
+         * Kernel VMA. Must always exist for an address space.
+         */
+        snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-sys", name);
+        err = __nvgpu_buddy_allocator_init(g, &vm->kernel,
+                                           vm, alloc_name,
+                                           kernel_vma_start,
+                                           kernel_vma_limit - kernel_vma_start,
+                                           SZ_4K,
+                                           GPU_BALLOC_MAX_ORDER,
+                                           kernel_vma_flags);
+        if (err)
+                goto clean_up_allocators;
+        vm->mapped_buffers = NULL;
+        nvgpu_mutex_init(&vm->update_gmmu_lock);
+        nvgpu_ref_init(&vm->ref);
+        nvgpu_init_list_node(&vm->vm_area_list);
+        /*
+         * This is only necessary for channel address spaces. The best way to
+         * distinguish channel address spaces from other address spaces is by
+         * size - if the address space is 4GB or less, it's not a channel.
+         */
+        if (vm->va_limit > SZ_4G) {
+                err = nvgpu_init_sema_pool(vm);
+                if (err)
+                        goto clean_up_allocators;
+        }
+        return 0;
+clean_up_allocators:
+        if (nvgpu_alloc_initialized(&vm->kernel))
+                nvgpu_alloc_destroy(&vm->kernel);
+        if (nvgpu_alloc_initialized(&vm->user))
+                nvgpu_alloc_destroy(&vm->user);
+        if (nvgpu_alloc_initialized(&vm->user_lp))
+                nvgpu_alloc_destroy(&vm->user_lp);
+clean_up_page_tables:
+        /* Cleans up nvgpu_gmmu_init_page_table() */
+        __nvgpu_pd_cache_free_direct(g, &vm->pdb);
+clean_up_vgpu_vm:
+#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
+        if (g->is_virtual)
+                vgpu_vm_remove(vm);
+#endif
+        return err;
+}
+/**
+ * nvgpu_init_vm() - Initialize an address space.
+ *
+ * @mm - Parent MM.
+ * @vm - The VM to init.
+ * @big_page_size - Size of big pages associated with this VM.
+ * @low_hole - The size of the low hole (unaddressable memory at the bottom of
+ *             the address space).
+ * @kernel_reserved - Space reserved for kernel only allocations.
+ * @aperture_size - Total size of the aperture.
+ * @big_pages - If true then big pages are possible in the VM. Note this does
+ *              not guarantee that big pages will be possible.
+ * @name - Name of the address space.
+ *
+ * This function initializes an address space according to the following map:
+ *
+ *     +--+ 0x0
+ *     |  |
+ *     +--+ @low_hole
+ *     |  |
+ *     ~  ~   This is the "user" section.
+ *     |  |
+ *     +--+ @aperture_size - @kernel_reserved
+ *     |  |
+ *     ~  ~   This is the "kernel" section.
+ *     |  |
+ *     +--+ @aperture_size
+ *
+ * The user section is therefor what ever is left over after the @low_hole and
+ * @kernel_reserved memory have been portioned out. The @kernel_reserved is
+ * always persent at the top of the memory space and the @low_hole is always at
+ * the bottom.
+ *
+ * For certain address spaces a "user" section makes no sense (bar1, etc) so in
+ * such cases the @kernel_reserved and @low_hole should sum to exactly
+ * @aperture_size.
+ */
+struct vm_gk20a *nvgpu_vm_init(struct gk20a *g,
+                               u32 big_page_size,
+                               u64 low_hole,
+                               u64 kernel_reserved,
+                               u64 aperture_size,
+                               bool big_pages,
+                               bool userspace_managed,
+                               char *name)
+{
+        struct vm_gk20a *vm = nvgpu_kzalloc(g, sizeof(*vm));
+        if (!vm)
+                return NULL;
+        if (__nvgpu_vm_init(&g->mm, vm, big_page_size, low_hole,
+                            kernel_reserved, aperture_size, big_pages,
+                            userspace_managed, name)) {
+                nvgpu_kfree(g, vm);
+                return NULL;
+        }
+        return vm;
+}
+/*
+ * Cleanup the VM!
+ */
+static void __nvgpu_vm_remove(struct vm_gk20a *vm)
+{
+        struct nvgpu_mapped_buf *mapped_buffer;
+        struct nvgpu_vm_area *vm_area, *vm_area_tmp;
+        struct nvgpu_rbtree_node *node = NULL;
+        struct gk20a *g = vm->mm->g;
+        /*
+         * Do this outside of the update_gmmu_lock since unmapping the semaphore
+         * pool involves unmapping a GMMU mapping which means aquiring the
+         * update_gmmu_lock.
+         */
+        if (!nvgpu_is_enabled(g, NVGPU_HAS_SYNCPOINTS)) {
+                if (vm->sema_pool) {
+                        nvgpu_semaphore_pool_unmap(vm->sema_pool, vm);
+                        nvgpu_semaphore_pool_put(vm->sema_pool);
+                }
+        }
+#if defined(CONFIG_TEGRA_GK20A_NVHOST) && defined(CONFIG_TEGRA_19x_GPU)
+        if (nvgpu_mem_is_valid(&g->syncpt_mem) && vm->syncpt_ro_map_gpu_va)
+                nvgpu_gmmu_unmap(vm, &g->syncpt_mem,
+                                vm->syncpt_ro_map_gpu_va);
+#endif
+        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
+        nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers);
+        while (node) {
+                mapped_buffer = mapped_buffer_from_rbtree_node(node);
+                __nvgpu_vm_unmap(mapped_buffer, NULL);
+                nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers);
+        }
+        /* destroy remaining reserved memory areas */
+        nvgpu_list_for_each_entry_safe(vm_area, vm_area_tmp,
+                        &vm->vm_area_list,
+                        nvgpu_vm_area, vm_area_list) {
+                nvgpu_list_del(&vm_area->vm_area_list);
+                nvgpu_kfree(vm->mm->g, vm_area);
+        }
+        if (nvgpu_alloc_initialized(&vm->kernel))
+                nvgpu_alloc_destroy(&vm->kernel);
+        if (nvgpu_alloc_initialized(&vm->user))
+                nvgpu_alloc_destroy(&vm->user);
+        if (nvgpu_alloc_initialized(&vm->user_lp))
+                nvgpu_alloc_destroy(&vm->user_lp);
+        nvgpu_vm_free_entries(vm, &vm->pdb);
+#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
+        if (g->is_virtual)
+                vgpu_vm_remove(vm);
+#endif
+        nvgpu_mutex_release(&vm->update_gmmu_lock);
+        nvgpu_kfree(g, vm);
+}
+static void __nvgpu_vm_remove_ref(struct nvgpu_ref *ref)
+{
+        struct vm_gk20a *vm = container_of(ref, struct vm_gk20a, ref);
+        __nvgpu_vm_remove(vm);
+}
+void nvgpu_vm_get(struct vm_gk20a *vm)
+{
+        nvgpu_ref_get(&vm->ref);
+}
+void nvgpu_vm_put(struct vm_gk20a *vm)
+{
+        nvgpu_ref_put(&vm->ref, __nvgpu_vm_remove_ref);
+}
+int nvgpu_insert_mapped_buf(struct vm_gk20a *vm,
+                            struct nvgpu_mapped_buf *mapped_buffer)
+{
+        mapped_buffer->node.key_start = mapped_buffer->addr;
+        mapped_buffer->node.key_end = mapped_buffer->addr + mapped_buffer->size;
+        nvgpu_rbtree_insert(&mapped_buffer->node, &vm->mapped_buffers);
+        return 0;
+}
+void nvgpu_remove_mapped_buf(struct vm_gk20a *vm,
+                             struct nvgpu_mapped_buf *mapped_buffer)
+{
+        nvgpu_rbtree_unlink(&mapped_buffer->node, &vm->mapped_buffers);
+}
+struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf(
+        struct vm_gk20a *vm, u64 addr)
+{
+        struct nvgpu_rbtree_node *node = NULL;
+        struct nvgpu_rbtree_node *root = vm->mapped_buffers;
+        nvgpu_rbtree_search(addr, &node, root);
+        if (!node)
+                return NULL;
+        return mapped_buffer_from_rbtree_node(node);
+}
+struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_range(
+        struct vm_gk20a *vm, u64 addr)
+{
+        struct nvgpu_rbtree_node *node = NULL;
+        struct nvgpu_rbtree_node *root = vm->mapped_buffers;
+        nvgpu_rbtree_range_search(addr, &node, root);
+        if (!node)
+                return NULL;
+        return mapped_buffer_from_rbtree_node(node);
+}
+struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_less_than(
+        struct vm_gk20a *vm, u64 addr)
+{
+        struct nvgpu_rbtree_node *node = NULL;
+        struct nvgpu_rbtree_node *root = vm->mapped_buffers;
+        nvgpu_rbtree_less_than_search(addr, &node, root);
+        if (!node)
+                return NULL;
+        return mapped_buffer_from_rbtree_node(node);
+}
+int nvgpu_vm_get_buffers(struct vm_gk20a *vm,
+                         struct nvgpu_mapped_buf ***mapped_buffers,
+                         int *num_buffers)
+{
+        struct nvgpu_mapped_buf *mapped_buffer;
+        struct nvgpu_mapped_buf **buffer_list;
+        struct nvgpu_rbtree_node *node = NULL;
+        int i = 0;
+        if (vm->userspace_managed) {
+                *mapped_buffers = NULL;
+                *num_buffers = 0;
+                return 0;
+        }
+        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
+        buffer_list = nvgpu_big_zalloc(vm->mm->g, sizeof(*buffer_list) *
+                                       vm->num_user_mapped_buffers);
+        if (!buffer_list) {
+                nvgpu_mutex_release(&vm->update_gmmu_lock);
+                return -ENOMEM;
+        }
+        nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers);
+        while (node) {
+                mapped_buffer = mapped_buffer_from_rbtree_node(node);
+                buffer_list[i] = mapped_buffer;
+                nvgpu_ref_get(&mapped_buffer->ref);
+                i++;
+                nvgpu_rbtree_enum_next(&node, node);
+        }
+        BUG_ON(i != vm->num_user_mapped_buffers);
+        *num_buffers = vm->num_user_mapped_buffers;
+        *mapped_buffers = buffer_list;
+        nvgpu_mutex_release(&vm->update_gmmu_lock);
+        return 0;
+}
+void nvgpu_vm_put_buffers(struct vm_gk20a *vm,
+                                 struct nvgpu_mapped_buf **mapped_buffers,
+                                 int num_buffers)
+{
+        int i;
+        struct vm_gk20a_mapping_batch batch;
+        if (num_buffers == 0)
+                return;
+        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
+        nvgpu_vm_mapping_batch_start(&batch);
+        vm->kref_put_batch = &batch;
+        for (i = 0; i < num_buffers; ++i)
+                nvgpu_ref_put(&mapped_buffers[i]->ref, __nvgpu_vm_unmap_ref);
+        vm->kref_put_batch = NULL;
+        nvgpu_vm_mapping_batch_finish_locked(vm, &batch);
+        nvgpu_mutex_release(&vm->update_gmmu_lock);
+        nvgpu_big_free(vm->mm->g, mapped_buffers);
+}
+struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm,
+                                      struct nvgpu_os_buffer *os_buf,
+                                      struct nvgpu_sgt *sgt,
+                                      u64 map_addr,
+                                      u64 map_size,
+                                      u64 phys_offset,
+                                      int rw,
+                                      u32 flags,
+                                      s16 compr_kind,
+                                      s16 incompr_kind,
+                                      struct vm_gk20a_mapping_batch *batch,
+                                      enum nvgpu_aperture aperture)
+{
+        struct gk20a *g = gk20a_from_vm(vm);
+        struct nvgpu_mapped_buf *mapped_buffer = NULL;
+        struct nvgpu_ctag_buffer_info binfo = { 0 };
+        struct nvgpu_vm_area *vm_area = NULL;
+        int err = 0;
+        u64 align;
+        u32 ctag_offset = 0;
+        bool clear_ctags = false;
+        bool va_allocated = true;
+        /*
+         * The kind used as part of the key for map caching. HW may
+         * actually be programmed with the fallback kind in case the
+         * key kind is compressible but we're out of comptags.
+         */
+        s16 map_key_kind;
+        /*
+         * The actual GMMU PTE kind
+         */
+        u8 pte_kind;
+        if (vm->userspace_managed &&
+            !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) {
+                nvgpu_err(g,
+                          "non-fixed-offset mapping not available on "
+                          "userspace managed address spaces");
+                return ERR_PTR(-EINVAL);
+        }
+        binfo.flags = flags;
+        binfo.size = nvgpu_os_buf_get_size(os_buf);
+        binfo.compr_kind = (vm->enable_ctag && compr_kind != NV_KIND_INVALID ?
+                            compr_kind : NV_KIND_INVALID);
+        binfo.incompr_kind = incompr_kind;
+        if (compr_kind != NV_KIND_INVALID)
+                map_key_kind = compr_kind;
+        else
+                map_key_kind = incompr_kind;
+        /*
+         * Check if this buffer is already mapped.
+         */
+        if (!vm->userspace_managed) {
+                nvgpu_mutex_acquire(&vm->update_gmmu_lock);
+                mapped_buffer = nvgpu_vm_find_mapping(vm,
+                                                      os_buf,
+                                                      map_addr,
+                                                      flags,
+                                                      map_key_kind);
+                nvgpu_mutex_release(&vm->update_gmmu_lock);
+                if (mapped_buffer) {
+                        nvgpu_ref_get(&mapped_buffer->ref);
+                        return mapped_buffer;
+                }
+        }
+        /*
+         * Generate a new mapping!
+         */
+        mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer));
+        if (!mapped_buffer) {
+                nvgpu_warn(g, "oom allocating tracking buffer");
+                return ERR_PTR(-ENOMEM);
+        }
+        align = nvgpu_sgt_alignment(g, sgt);
+        if (g->mm.disable_bigpage)
+                binfo.pgsz_idx = gmmu_page_size_small;
+        else
+                binfo.pgsz_idx = __get_pte_size(vm, map_addr,
+                                                min_t(u64, binfo.size, align));
+        map_size = map_size ? map_size : binfo.size;
+        map_size = ALIGN(map_size, SZ_4K);
+        if ((map_size > binfo.size) ||
+            (phys_offset > (binfo.size - map_size))) {
+                err = -EINVAL;
+                goto clean_up_nolock;
+        }
+        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
+        /*
+         * Check if we should use a fixed offset for mapping this buffer.
+         */
+        if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)  {
+                err = nvgpu_vm_area_validate_buffer(vm,
+                                                    map_addr,
+                                                    map_size,
+                                                    binfo.pgsz_idx,
+                                                    &vm_area);
+                if (err)
+                        goto clean_up;
+                va_allocated = false;
+        }
+        err = nvgpu_vm_compute_compression(vm, &binfo);
+        if (err) {
+                nvgpu_err(g, "failure setting up compression");
+                goto clean_up;
+        }
+        if (binfo.compr_kind != NV_KIND_INVALID) {
+                struct gk20a_comptags comptags = { 0 };
+                /*
+                 * Get the comptags state, alloc if necessary
+                 */
+                err = gk20a_alloc_or_get_comptags(g, os_buf,
+                                                  &g->gr.comp_tags,
+                                                  &comptags);
+                if (err) {
+                        /*
+                         * This is an irrecoverable failure and we need to
+                         * abort. In particular, it is not safe to proceed with
+                         * the incompressible fallback, since we cannot not mark
+                         * our alloc failure anywere. Later we would retry
+                         * allocation and break compressible map aliasing.
+                         */
+                        nvgpu_err(g, "Error %d setting up comptags", err);
+                        goto clean_up;
+                }
+                /*
+                 * Newly allocated comptags needs to be cleared
+                 */
+                if (comptags.needs_clear) {
+                        if (g->ops.ltc.cbc_ctrl) {
+                                if (gk20a_comptags_start_clear(os_buf)) {
+                                        err = g->ops.ltc.cbc_ctrl(
+                                                g, gk20a_cbc_op_clear,
+                                                comptags.offset,
+                                                (comptags.offset +
+                                                 comptags.lines - 1));
+                                        gk20a_comptags_finish_clear(
+                                                os_buf, err == 0);
+                                        if (err)
+                                                goto clean_up;
+                                }
+                        } else {
+                                /*
+                                 * Cleared as part of gmmu map
+                                 */
+                                clear_ctags = true;
+                        }
+                }
+                /*
+                 * Store the ctag offset for later use if we got the comptags
+                 */
+                if (comptags.lines)
+                        ctag_offset = comptags.offset;
+        }
+        /*
+         * Figure out the kind and ctag offset for the GMMU page tables
+         */
+        if (binfo.compr_kind != NV_KIND_INVALID && ctag_offset) {
+                /*
+                 * Adjust the ctag_offset as per the buffer map offset
+                 */
+                ctag_offset += phys_offset >>
+                        ilog2(g->ops.fb.compression_page_size(g));
+                pte_kind = binfo.compr_kind;
+        } else if (binfo.incompr_kind != NV_KIND_INVALID) {
+                /*
+                 * Incompressible kind, ctag offset will not be programmed
+                 */
+                ctag_offset = 0;
+                pte_kind = binfo.incompr_kind;
+        } else {
+                /*
+                 * Caller required compression, but we cannot provide it
+                 */
+                nvgpu_err(g, "No comptags and no incompressible fallback kind");
+                err = -ENOMEM;
+                goto clean_up;
+        }
+        if (clear_ctags)
+                clear_ctags = gk20a_comptags_start_clear(os_buf);
+        map_addr = g->ops.mm.gmmu_map(vm,
+                                      map_addr,
+                                      sgt,
+                                      phys_offset,
+                                      map_size,
+                                      binfo.pgsz_idx,
+                                      pte_kind,
+                                      ctag_offset,
+                                      flags,
+                                      rw,
+                                      clear_ctags,
+                                      false,
+                                      false,
+                                      batch,
+                                      aperture);
+        if (clear_ctags)
+                gk20a_comptags_finish_clear(os_buf, map_addr != 0);
+        if (!map_addr) {
+                err = -ENOMEM;
+                goto clean_up;
+        }
+        nvgpu_init_list_node(&mapped_buffer->buffer_list);
+        nvgpu_ref_init(&mapped_buffer->ref);
+        mapped_buffer->addr         = map_addr;
+        mapped_buffer->size         = map_size;
+        mapped_buffer->pgsz_idx     = binfo.pgsz_idx;
+        mapped_buffer->vm           = vm;
+        mapped_buffer->flags        = flags;
+        mapped_buffer->kind         = map_key_kind;
+        mapped_buffer->va_allocated = va_allocated;
+        mapped_buffer->vm_area      = vm_area;
+        err = nvgpu_insert_mapped_buf(vm, mapped_buffer);
+        if (err) {
+                nvgpu_err(g, "failed to insert into mapped buffer tree");
+                goto clean_up;
+        }
+        vm->num_user_mapped_buffers++;
+        if (vm_area) {
+                nvgpu_list_add_tail(&mapped_buffer->buffer_list,
+                              &vm_area->buffer_list_head);
+                mapped_buffer->vm_area = vm_area;
+        }
+        nvgpu_mutex_release(&vm->update_gmmu_lock);
+        return mapped_buffer;
+clean_up:
+        if (mapped_buffer->addr)
+                g->ops.mm.gmmu_unmap(vm,
+                                     mapped_buffer->addr,
+                                     mapped_buffer->size,
+                                     mapped_buffer->pgsz_idx,
+                                     mapped_buffer->va_allocated,
+                                     gk20a_mem_flag_none,
+                                     mapped_buffer->vm_area ?
+                                     mapped_buffer->vm_area->sparse : false,
+                                     NULL);
+        nvgpu_mutex_release(&vm->update_gmmu_lock);
+clean_up_nolock:
+        nvgpu_kfree(g, mapped_buffer);
+        return ERR_PTR(err);
+}
+/*
+ * Really unmap. This does the real GMMU unmap and removes the mapping from the
+ * VM map tracking tree (and vm_area list if necessary).
+ */
+static void __nvgpu_vm_unmap(struct nvgpu_mapped_buf *mapped_buffer,
+                             struct vm_gk20a_mapping_batch *batch)
+{
+        struct vm_gk20a *vm = mapped_buffer->vm;
+        struct gk20a *g = vm->mm->g;
+        vm->num_user_mapped_buffers--;
+        g->ops.mm.gmmu_unmap(vm,
+                             mapped_buffer->addr,
+                             mapped_buffer->size,
+                             mapped_buffer->pgsz_idx,
+                             mapped_buffer->va_allocated,
+                             gk20a_mem_flag_none,
+                             mapped_buffer->vm_area ?
+                             mapped_buffer->vm_area->sparse : false,
+                             batch);
+        /*
+         * Remove from mapped buffer tree. Then delete the buffer from the
+         * linked list of mapped buffers; though note: not all mapped buffers
+         * are part of a vm_area.
+         */
+        nvgpu_remove_mapped_buf(vm, mapped_buffer);
+        nvgpu_list_del(&mapped_buffer->buffer_list);
+        /*
+         * OS specific freeing. This is after the generic freeing incase the
+         * generic freeing relies on some component of the OS specific
+         * nvgpu_mapped_buf in some abstraction or the like.
+         */
+        nvgpu_vm_unmap_system(mapped_buffer);
+        nvgpu_kfree(g, mapped_buffer);
+}
+void __nvgpu_vm_unmap_ref(struct nvgpu_ref *ref)
+{
+        struct nvgpu_mapped_buf *mapped_buffer =
+                container_of(ref, struct nvgpu_mapped_buf, ref);
+        __nvgpu_vm_unmap(mapped_buffer, mapped_buffer->vm->kref_put_batch);
+}
+/*
+ * For fixed-offset buffers we must sync the buffer. That means we wait for the
+ * buffer to hit a ref-count of 1 before proceeding.
+ *
+ * Note: this requires the update_gmmu_lock to be held since we release it and
+ * re-aquire it in this function.
+ */
+static int nvgpu_vm_unmap_sync_buffer(struct vm_gk20a *vm,
+                                      struct nvgpu_mapped_buf *mapped_buffer)
+{
+        struct nvgpu_timeout timeout;
+        int ret = 0;
+        nvgpu_mutex_release(&vm->update_gmmu_lock);
+        /*
+         * 500ms second timer.
+         */
+        nvgpu_timeout_init(vm->mm->g, &timeout, 50, NVGPU_TIMER_CPU_TIMER);
+        do {
+                if (nvgpu_atomic_read(&mapped_buffer->ref.refcount) == 1)
+                        break;
+                nvgpu_msleep(10);
+        } while (!nvgpu_timeout_expired_msg(&timeout,
+                                            "sync-unmap failed on 0x%llx"));
+        if (nvgpu_timeout_expired(&timeout))
+                ret = -ETIMEDOUT;
+        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
+        return ret;
+}
+void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset,
+                    struct vm_gk20a_mapping_batch *batch)
+{
+        struct nvgpu_mapped_buf *mapped_buffer;
+        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
+        mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, offset);
+        if (!mapped_buffer)
+                goto done;
+        if (mapped_buffer->flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
+                if (nvgpu_vm_unmap_sync_buffer(vm, mapped_buffer))
+                        /*
+                         * Looks like we have failed... Better not continue in
+                         * case the buffer is in use.
+                         */
+                        goto done;
+        }
+        /*
+         * Make sure we have access to the batch if we end up calling through to
+         * the unmap_ref function.
+         */
+        vm->kref_put_batch = batch;
+        nvgpu_ref_put(&mapped_buffer->ref, __nvgpu_vm_unmap_ref);
+        vm->kref_put_batch = NULL;
+done:
+        nvgpu_mutex_release(&vm->update_gmmu_lock);
+        return;
+}
+static int nvgpu_vm_compute_compression(struct vm_gk20a *vm,
+                                        struct nvgpu_ctag_buffer_info *binfo)
+{
+        bool kind_compressible = (binfo->compr_kind != NV_KIND_INVALID);
+        struct gk20a *g = gk20a_from_vm(vm);
+        if (kind_compressible &&
+            vm->gmmu_page_sizes[binfo->pgsz_idx] <
+            g->ops.fb.compressible_page_size(g)) {
+                /*
+                 * Let's double check that there is a fallback kind
+                 */
+                if (binfo->incompr_kind == NV_KIND_INVALID) {
+                        nvgpu_err(g,
+                                  "Unsupported page size for compressible "
+                                  "kind, but no fallback kind");
+                        return -EINVAL;
+                } else {
+                        nvgpu_log(g, gpu_dbg_map,
+                                  "Unsupported page size for compressible "
+                                  "kind, demoting to incompressible");
+                        binfo->compr_kind = NV_KIND_INVALID;
+                        kind_compressible = false;
+                }
+        }
+        return 0;
+}
diff --git a/drivers/gpu/nvgpu/common/mm/vm_area.c b/drivers/gpu/nvgpu/common/mm/vm_area.c
new file mode 100644
index 00000000..b6286c43
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/vm_area.c
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <uapi/linux/nvgpu.h>
+#include <nvgpu/vm.h>
+#include <nvgpu/vm_area.h>
+#include "gk20a/gk20a.h"
+#include "gk20a/mm_gk20a.h"
+struct nvgpu_vm_area *nvgpu_vm_area_find(struct vm_gk20a *vm, u64 addr)
+{
+        struct nvgpu_vm_area *vm_area;
+        nvgpu_list_for_each_entry(vm_area, &vm->vm_area_list,
+                                  nvgpu_vm_area, vm_area_list) {
+                if (addr >= vm_area->addr &&
+                    addr < (u64)vm_area->addr + (u64)vm_area->size)
+                        return vm_area;
+        }
+        return NULL;
+}
+int nvgpu_vm_area_validate_buffer(struct vm_gk20a *vm,
+                                  u64 map_addr, u64 map_size, int pgsz_idx,
+                                  struct nvgpu_vm_area **pvm_area)
+{
+        struct gk20a *g = vm->mm->g;
+        struct nvgpu_vm_area *vm_area;
+        struct nvgpu_mapped_buf *buffer;
+        u64 map_end = map_addr + map_size;
+        /* can wrap around with insane map_size; zero is disallowed too */
+        if (map_end <= map_addr) {
+                nvgpu_warn(g, "fixed offset mapping with invalid map_size");
+                return -EINVAL;
+        }
+        if (map_addr & (vm->gmmu_page_sizes[pgsz_idx] - 1)) {
+                nvgpu_err(g, "map offset must be buffer page size aligned 0x%llx",
+                          map_addr);
+                return -EINVAL;
+        }
+        /* Find the space reservation, but it's ok to have none for
+         * userspace-managed address spaces */
+        vm_area = nvgpu_vm_area_find(vm, map_addr);
+        if (!vm_area && !vm->userspace_managed) {
+                nvgpu_warn(g, "fixed offset mapping without space allocation");
+                return -EINVAL;
+        }
+        /* Mapped area should fit inside va, if there's one */
+        if (vm_area && map_end > vm_area->addr + vm_area->size) {
+                nvgpu_warn(g, "fixed offset mapping size overflows va node");
+                return -EINVAL;
+        }
+        /* check that this mapping does not collide with existing
+         * mappings by checking the buffer with the highest GPU VA
+         * that is less than our buffer end */
+        buffer = __nvgpu_vm_find_mapped_buf_less_than(
+                vm, map_addr + map_size);
+        if (buffer && buffer->addr + buffer->size > map_addr) {
+                nvgpu_warn(g, "overlapping buffer map requested");
+                return -EINVAL;
+        }
+        *pvm_area = vm_area;
+        return 0;
+}
+int nvgpu_vm_area_alloc(struct vm_gk20a *vm, u32 pages, u32 page_size,
+                        u64 *addr, u32 flags)
+{
+        struct gk20a *g = vm->mm->g;
+        struct nvgpu_allocator *vma;
+        struct nvgpu_vm_area *vm_area;
+        u64 vaddr_start = 0;
+        int pgsz_idx = gmmu_page_size_small;
+        nvgpu_log(g, gpu_dbg_map,
+                  "ADD vm_area: pgsz=%#-8x pages=%-9u addr=%#-14llx flags=0x%x",
+                  page_size, pages, *addr, flags);
+        for (; pgsz_idx < gmmu_nr_page_sizes; pgsz_idx++) {
+                if (vm->gmmu_page_sizes[pgsz_idx] == page_size)
+                        break;
+        }
+        if (pgsz_idx > gmmu_page_size_big)
+                return -EINVAL;
+        if (!vm->big_pages && pgsz_idx == gmmu_page_size_big)
+                return -EINVAL;
+        vm_area = nvgpu_kzalloc(g, sizeof(*vm_area));
+        if (!vm_area)
+                goto clean_up_err;
+        vma = vm->vma[pgsz_idx];
+        if (flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET)
+                vaddr_start = nvgpu_alloc_fixed(vma, *addr,
+                                                (u64)pages *
+                                                (u64)page_size,
+                                                page_size);
+        else
+                vaddr_start = nvgpu_alloc(vma,
+                                          (u64)pages *
+                                          (u64)page_size);
+        if (!vaddr_start)
+                goto clean_up_err;
+        vm_area->flags = flags;
+        vm_area->addr = vaddr_start;
+        vm_area->size = (u64)page_size * (u64)pages;
+        vm_area->pgsz_idx = pgsz_idx;
+        nvgpu_init_list_node(&vm_area->buffer_list_head);
+        nvgpu_init_list_node(&vm_area->vm_area_list);
+        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
+        if (flags & NVGPU_AS_ALLOC_SPACE_FLAGS_SPARSE) {
+                u64 map_addr = g->ops.mm.gmmu_map(vm, vaddr_start,
+                                         NULL,
+                                         0,
+                                         vm_area->size,
+                                         pgsz_idx,
+                                         0,
+                                         0,
+                                         flags,
+                                         gk20a_mem_flag_none,
+                                         false,
+                                         true,
+                                         false,
+                                         NULL,
+                                         APERTURE_INVALID);
+                if (!map_addr) {
+                        nvgpu_mutex_release(&vm->update_gmmu_lock);
+                        goto clean_up_err;
+                }
+                vm_area->sparse = true;
+        }
+        nvgpu_list_add_tail(&vm_area->vm_area_list, &vm->vm_area_list);
+        nvgpu_mutex_release(&vm->update_gmmu_lock);
+        *addr = vaddr_start;
+        return 0;
+clean_up_err:
+        if (vaddr_start)
+                nvgpu_free(vma, vaddr_start);
+        if (vm_area)
+                nvgpu_kfree(g, vm_area);
+        return -ENOMEM;
+}
+int nvgpu_vm_area_free(struct vm_gk20a *vm, u64 addr)
+{
+        struct gk20a *g = gk20a_from_vm(vm);
+        struct nvgpu_mapped_buf *buffer, *n;
+        struct nvgpu_vm_area *vm_area;
+        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
+        vm_area = nvgpu_vm_area_find(vm, addr);
+        if (!vm_area) {
+                nvgpu_mutex_release(&vm->update_gmmu_lock);
+                return 0;
+        }
+        nvgpu_list_del(&vm_area->vm_area_list);
+        nvgpu_mutex_release(&vm->update_gmmu_lock);
+        nvgpu_log(g, gpu_dbg_map,
+                  "DEL vm_area: pgsz=%#-8x pages=%-9llu "
+                  "addr=%#-14llx flags=0x%x",
+                  vm->gmmu_page_sizes[vm_area->pgsz_idx],
+                  vm_area->size / vm->gmmu_page_sizes[vm_area->pgsz_idx],
+                  vm_area->addr,
+                  vm_area->flags);
+        /* Decrement the ref count on all buffers in this vm_area. This
+         * allows userspace to let the kernel free mappings that are
+         * only used by this vm_area. */
+        nvgpu_list_for_each_entry_safe(buffer, n,
+                                       &vm_area->buffer_list_head,
+                                       nvgpu_mapped_buf, buffer_list) {
+                nvgpu_list_del(&buffer->buffer_list);
+                nvgpu_ref_put(&buffer->ref, __nvgpu_vm_unmap_ref);
+        }
+        /* if this was a sparse mapping, free the va */
+        if (vm_area->sparse)
+                g->ops.mm.gmmu_unmap(vm,
+                                     vm_area->addr,
+                                     vm_area->size,
+                                     vm_area->pgsz_idx,
+                                     true,
+                                     gk20a_mem_flag_none,
+                                     true,
+                                     NULL);
+        nvgpu_free(vm->vma[vm_area->pgsz_idx], vm_area->addr);
+        nvgpu_kfree(g, vm_area);
+        return 0;
+}