6 files changed, 345 insertions, 145 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
index e4991d0d..eb54f3fd 100644
--- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
@@ -21,6 +21,7 @@
 #include <nvgpu/log.h>
 #include <nvgpu/bug.h>
 #include <nvgpu/enabled.h>
+#include <nvgpu/kmem.h>
 #include <nvgpu/linux/dma.h>
@@ -395,3 +396,116 @@ int __nvgpu_mem_create_from_pages(struct gk20a *g, struct nvgpu_mem *dest,
        return 0;
 }
+static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_dup(struct gk20a *g,
+                                                 struct nvgpu_mem_sgl *sgl)
+{
+        struct nvgpu_mem_sgl *head, *next;
+        head = nvgpu_kzalloc(g, sizeof(*sgl));
+        if (!head)
+                return NULL;
+        next = head;
+        while (true) {
+                nvgpu_log(g, gpu_dbg_sgl,
+                          "  phys: 0x%-12llx dma: 0x%-12llx len: 0x%llx",
+                          sgl->phys, sgl->dma, sgl->length);
+                next->dma    = sgl->dma;
+                next->phys   = sgl->phys;
+                next->length = sgl->length;
+                next->next   = NULL;
+                sgl = nvgpu_mem_sgl_next(sgl);
+                if (!sgl)
+                        break;
+                next->next = nvgpu_kzalloc(g, sizeof(*sgl));
+                if (!next->next) {
+                        nvgpu_mem_sgl_free(g, head);
+                        return NULL;
+                }
+                next = next->next;
+        }
+        return head;
+}
+static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_create_from_vidmem(
+        struct gk20a *g,
+        struct scatterlist *linux_sgl)
+{
+        struct nvgpu_page_alloc *vidmem_alloc;
+        vidmem_alloc = get_vidmem_page_alloc(linux_sgl);
+        if (!vidmem_alloc)
+                return NULL;
+        nvgpu_log(g, gpu_dbg_sgl, "Vidmem sgl:");
+        return __nvgpu_mem_sgl_dup(g, vidmem_alloc->sgl);
+}
+struct nvgpu_mem_sgl *nvgpu_mem_sgl_create(struct gk20a *g,
+                                           struct sg_table *sgt)
+{
+        struct nvgpu_mem_sgl *head, *sgl, *next;
+        struct scatterlist *linux_sgl = sgt->sgl;
+        if (is_vidmem_page_alloc(sg_dma_address(linux_sgl)))
+                return __nvgpu_mem_sgl_create_from_vidmem(g, linux_sgl);
+        head = nvgpu_kzalloc(g, sizeof(*sgl));
+        if (!head)
+                return NULL;
+        nvgpu_log(g, gpu_dbg_sgl, "Making sgl:");
+        sgl = head;
+        while (true) {
+                sgl->dma    = sg_dma_address(linux_sgl);
+                sgl->phys   = sg_phys(linux_sgl);
+                sgl->length = linux_sgl->length;
+                /*
+                 * We don't like offsets in the pages here. This will cause
+                 * problems.
+                 */
+                if (WARN_ON(linux_sgl->offset)) {
+                        nvgpu_mem_sgl_free(g, head);
+                        return NULL;
+                }
+                nvgpu_log(g, gpu_dbg_sgl,
+                          "  phys: 0x%-12llx dma: 0x%-12llx len: 0x%llx",
+                          sgl->phys, sgl->dma, sgl->length);
+                /*
+                 * When there's no more SGL ents for the Linux SGL we are
+                 * done. Don't bother making any more SGL ents for the nvgpu
+                 * SGL.
+                 */
+                linux_sgl = sg_next(linux_sgl);
+                if (!linux_sgl)
+                        break;
+                next = nvgpu_kzalloc(g, sizeof(*sgl));
+                if (!next) {
+                        nvgpu_mem_sgl_free(g, head);
+                        return NULL;
+                }
+                sgl->next = next;
+                sgl = next;
+        }
+        nvgpu_log(g, gpu_dbg_sgl, "Done!");
+        return head;
+}
+struct nvgpu_mem_sgl *nvgpu_mem_sgl_create_from_mem(struct gk20a *g,
+                                                    struct nvgpu_mem *mem)
+{
+        return nvgpu_mem_sgl_create(g, mem->priv.sgt);
+}
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c
index 86d8bec9..4a4429dc 100644
--- a/drivers/gpu/nvgpu/common/linux/vm.c
+++ b/drivers/gpu/nvgpu/common/linux/vm.c
@@ -21,8 +21,11 @@
 #include <nvgpu/lock.h>
 #include <nvgpu/rbtree.h>
 #include <nvgpu/vm_area.h>
+#include <nvgpu/nvgpu_mem.h>
 #include <nvgpu/page_allocator.h>
+#include <nvgpu/linux/nvgpu_mem.h>
 #include "gk20a/gk20a.h"
 #include "gk20a/mm_gk20a.h"
 #include "gk20a/kind_gk20a.h"
@@ -66,17 +69,19 @@ static u64 nvgpu_get_buffer_alignment(struct gk20a *g, struct scatterlist *sgl,
        if (aperture == APERTURE_VIDMEM) {
                struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl);
-                struct page_alloc_chunk *chunk = NULL;
+                struct nvgpu_mem_sgl *sgl_vid = alloc->sgl;
-                nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks,
+                while (sgl_vid) {
-                                        page_alloc_chunk, list_entry) {
+                        chunk_align = 1ULL <<
-                        chunk_align = 1ULL << __ffs(chunk->base |
+                                __ffs(nvgpu_mem_sgl_phys(sgl_vid) |
-                                                    chunk->length);
+                                nvgpu_mem_sgl_length(sgl_vid));
                        if (align)
                                align = min(align, chunk_align);
                        else
                                align = chunk_align;
+                        sgl_vid = nvgpu_mem_sgl_next(sgl_vid);
                }
                return align;
@@ -237,6 +242,7 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
        struct nvgpu_vm_area *vm_area = NULL;
        u32 ctag_offset;
        enum nvgpu_aperture aperture;
+        struct nvgpu_mem_sgl *nvgpu_sgl;
        /*
         * The kind used as part of the key for map caching. HW may
@@ -393,9 +399,12 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
                ctag_offset += buffer_offset >>
                               ilog2(g->ops.fb.compression_page_size(g));
+        nvgpu_sgl = nvgpu_mem_sgl_create(g, bfr.sgt);
        /* update gmmu ptes */
-        map_offset = g->ops.mm.gmmu_map(vm, map_offset,
+        map_offset = g->ops.mm.gmmu_map(vm,
-                                        bfr.sgt,
+                                        map_offset,
+                                        nvgpu_sgl,
                                        buffer_offset, /* sg offset */
                                        mapping_size,
                                        bfr.pgsz_idx,
@@ -410,6 +419,8 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
        if (!map_offset)
                goto clean_up;
+        nvgpu_mem_sgl_free(g, nvgpu_sgl);
        mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer));
        if (!mapped_buffer) {
                nvgpu_warn(g, "oom allocating tracking buffer");
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
index 7f486d68..41f5acdd 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -65,11 +65,14 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
        struct gk20a *g = gk20a_from_vm(vm);
        u64 vaddr;
-        struct sg_table *sgt = mem->priv.sgt;
+        struct nvgpu_mem_sgl *sgl = nvgpu_mem_sgl_create_from_mem(g, mem);
+        if (!sgl)
+                return -ENOMEM;
        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
        vaddr = g->ops.mm.gmmu_map(vm, addr,
-                                   sgt,    /* sg table */
+                                   sgl,    /* sg list */
                                   0,      /* sg offset */
                                   size,
                                   gmmu_page_size_kernel,
@@ -82,8 +85,11 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
                                   NULL,   /* mapping_batch handle */
                                   aperture);
        nvgpu_mutex_release(&vm->update_gmmu_lock);
+        nvgpu_mem_sgl_free(g, sgl);
        if (!vaddr) {
-                nvgpu_err(g, "failed to allocate va space");
+                nvgpu_err(g, "failed to map buffer!");
                return 0;
        }
@@ -91,7 +97,7 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
 }
 /*
- * Convenience wrapper over __nvgpu_gmmu_map() for non-fixed mappings.
+ * Map a nvgpu_mem into the GMMU. This is for kernel space to use.
 */
 u64 nvgpu_gmmu_map(struct vm_gk20a *vm,
                   struct nvgpu_mem *mem,
@@ -106,7 +112,7 @@ u64 nvgpu_gmmu_map(struct vm_gk20a *vm,
 }
 /*
- * Like nvgpu_gmmu_map() except it can work on a fixed address instead.
+ * Like nvgpu_gmmu_map() except this can work on a fixed address.
 */
 u64 nvgpu_gmmu_map_fixed(struct vm_gk20a *vm,
                         struct nvgpu_mem *mem,
@@ -407,7 +413,7 @@ static int __set_pd_level(struct vm_gk20a *vm,
                 */
                target_addr = next_pd ?
                        nvgpu_pde_phys_addr(g, next_pd) :
-                        g->ops.mm.gpu_phys_addr(g, attrs, phys_addr);
+                        phys_addr;
                l->update_entry(vm, l,
                                pd, pd_idx,
@@ -458,18 +464,16 @@ static int __set_pd_level(struct vm_gk20a *vm,
 * VIDMEM version of the update_ptes logic.
 */
 static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm,
-                                                 struct sg_table *sgt,
+                                                 struct nvgpu_mem_sgl *sgl,
                                                 u64 space_to_skip,
                                                 u64 virt_addr,
                                                 u64 length,
                                                 struct nvgpu_gmmu_attrs *attrs)
 {
-        struct nvgpu_page_alloc *alloc = NULL;
-        struct page_alloc_chunk *chunk = NULL;
        u64 phys_addr, chunk_length;
        int err = 0;
-        if (!sgt) {
+        if (!sgl) {
                /*
                 * This is considered an unmap. Just pass in 0 as the physical
                 * address for the entire GPU range.
@@ -482,22 +486,21 @@ static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm,
                return err;
        }
-        alloc = get_vidmem_page_alloc(sgt->sgl);
        /*
         * Otherwise iterate across all the chunks in this allocation and
         * map them.
         */
-        nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks,
+        while (sgl) {
-                                  page_alloc_chunk, list_entry) {
                if (space_to_skip &&
-                    space_to_skip >= chunk->length) {
+                    space_to_skip >= nvgpu_mem_sgl_length(sgl)) {
-                        space_to_skip -= chunk->length;
+                        space_to_skip -= nvgpu_mem_sgl_length(sgl);
+                        sgl = nvgpu_mem_sgl_next(sgl);
                        continue;
                }
-                phys_addr = chunk->base + space_to_skip;
+                phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip;
-                chunk_length = min(length, (chunk->length - space_to_skip));
+                chunk_length = min(length, (nvgpu_mem_sgl_length(sgl) -
+                                            space_to_skip));
                err = __set_pd_level(vm, &vm->pdb,
                                     0,
@@ -518,23 +521,24 @@ static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm,
                if (length == 0)
                        break;
+                sgl = nvgpu_mem_sgl_next(sgl);
        }
        return err;
 }
 static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
-                                                 struct sg_table *sgt,
+                                                 struct nvgpu_mem_sgl *sgl,
                                                 u64 space_to_skip,
                                                 u64 virt_addr,
                                                 u64 length,
                                                 struct nvgpu_gmmu_attrs *attrs)
 {
        int err;
-        struct scatterlist *sgl;
        struct gk20a *g = gk20a_from_vm(vm);
-        if (!sgt) {
+        if (!sgl) {
                /*
                 * This is considered an unmap. Just pass in 0 as the physical
                 * address for the entire GPU range.
@@ -548,19 +552,15 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
        }
        /*
-         * At this point we have a Linux scatter-gather list pointing to some
+         * At this point we have a scatter-gather list pointing to some number
-         * number of discontiguous chunks of memory. Iterate over that list and
+         * of discontiguous chunks of memory. We must iterate over that list and
         * generate a GMMU map call for each chunk. There are two possibilities:
-         * either the IOMMU is enabled or not. When the IOMMU is enabled the
+         * either an IOMMU is enabled or not. When an IOMMU is enabled the
         * mapping is simple since the "physical" address is actually a virtual
-         * IO address and will be contiguous. The no-IOMMU case is more
+         * IO address and will be contiguous.
-         * complicated. We will have to iterate over the SGT and do a separate
-         * map for each chunk of the SGT.
         */
-        sgl = sgt->sgl;
        if (!g->mm.bypass_smmu) {
-                u64 io_addr = nvgpu_mem_get_addr_sgl(g, sgl);
+                u64 io_addr = nvgpu_mem_sgl_gpu_addr(g, sgl, attrs);
                io_addr += space_to_skip;
@@ -585,14 +585,16 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
                /*
                 * Cut out sgl ents for space_to_skip.
                 */
-                if (space_to_skip && space_to_skip >= sgl->length) {
+                if (space_to_skip &&
-                        space_to_skip -= sgl->length;
+                    space_to_skip >= nvgpu_mem_sgl_length(sgl)) {
-                        sgl = sg_next(sgl);
+                        space_to_skip -= nvgpu_mem_sgl_length(sgl);
+                        sgl = nvgpu_mem_sgl_next(sgl);
                        continue;
                }
-                phys_addr = sg_phys(sgl) + space_to_skip;
+                phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip;
-                chunk_length = min(length, sgl->length - space_to_skip);
+                chunk_length = min(length,
+                                   nvgpu_mem_sgl_length(sgl) - space_to_skip);
                err = __set_pd_level(vm, &vm->pdb,
                                     0,
@@ -600,13 +602,11 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
                                     virt_addr,
                                     chunk_length,
                                     attrs);
-                if (err)
-                        return err;
                space_to_skip = 0;
                virt_addr += chunk_length;
                length    -= chunk_length;
-                sgl        = sg_next(sgl);
+                sgl        = nvgpu_mem_sgl_next(sgl);
                if (length == 0)
                        break;
@@ -624,22 +624,20 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
 * implementations. But the logic around that is generic to all chips. Every
 * chip has some number of PDE levels and then a PTE level.
 *
- * Each chunk of the incoming SGT is sent to the chip specific implementation
+ * Each chunk of the incoming SGL is sent to the chip specific implementation
 * of page table update.
 *
 * [*] Note: the "physical" address may actually be an IO virtual address in the
 *     case of SMMU usage.
 */
 static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
-                                          struct sg_table *sgt,
+                                          struct nvgpu_mem_sgl *sgl,
                                          u64 space_to_skip,
                                          u64 virt_addr,
                                          u64 length,
                                          struct nvgpu_gmmu_attrs *attrs)
 {
        struct gk20a *g = gk20a_from_vm(vm);
-        struct nvgpu_page_alloc *alloc;
-        u64 phys_addr = 0;
        u32 page_size;
        int err;
@@ -665,25 +663,16 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
                return err;
        }
-        if (sgt) {
-                if (attrs->aperture == APERTURE_VIDMEM) {
-                        alloc = get_vidmem_page_alloc(sgt->sgl);
-                        phys_addr = alloc->base;
-                } else
-                        phys_addr = nvgpu_mem_get_addr_sgl(g, sgt->sgl);
-        }
        __gmmu_dbg(g, attrs,
                   "vm=%s "
                   "%-5s GPU virt %#-12llx +%#-9llx    phys %#-12llx "
                   "phys offset: %#-4llx;  pgsz: %3dkb perm=%-2s | "
                   "kind=%#02x APT=%-6s %c%c%c%c%c",
                   vm->name,
-                   sgt ? "MAP" : "UNMAP",
+                   sgl ? "MAP" : "UNMAP",
                   virt_addr,
                   length,
-                   phys_addr,
+                   sgl ? nvgpu_mem_sgl_phys(sgl) : 0,
                   space_to_skip,
                   page_size >> 10,
                   nvgpu_gmmu_perm_str(attrs->rw_flag),
@@ -696,19 +685,19 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
                   attrs->valid     ? 'V' : '-');
        /*
-         * Handle VIDMEM progamming. Currently uses a different scatter list
+         * For historical reasons these are separate, but soon these will be
-         * format.
+         * unified.
         */
        if (attrs->aperture == APERTURE_VIDMEM)
                err = __nvgpu_gmmu_update_page_table_vidmem(vm,
-                                                            sgt,
+                                                            sgl,
                                                            space_to_skip,
                                                            virt_addr,
                                                            length,
                                                            attrs);
        else
                err = __nvgpu_gmmu_update_page_table_sysmem(vm,
-                                                            sgt,
+                                                            sgl,
                                                            space_to_skip,
                                                            virt_addr,
                                                            length,
@@ -717,7 +706,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
        unmap_gmmu_pages(g, &vm->pdb);
        nvgpu_smp_mb();
-        __gmmu_dbg(g, attrs, "%-5s Done!", sgt ? "MAP" : "UNMAP");
+        __gmmu_dbg(g, attrs, "%-5s Done!", sgl ? "MAP" : "UNMAP");
        return err;
 }
@@ -736,7 +725,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
 */
 u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
                          u64 vaddr,
-                          struct sg_table *sgt,
+                          struct nvgpu_mem_sgl *sgl,
                          u64 buffer_offset,
                          u64 size,
                          int pgsz_idx,
@@ -785,7 +774,7 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
                allocated = true;
        }
-        err = __nvgpu_gmmu_update_page_table(vm, sgt, buffer_offset,
+        err = __nvgpu_gmmu_update_page_table(vm, sgl, buffer_offset,
                                             vaddr, size, &attrs);
        if (err) {
                nvgpu_err(g, "failed to update ptes on map");
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
new file mode 100644
index 00000000..7296c673
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <nvgpu/kmem.h>
+#include <nvgpu/nvgpu_mem.h>
+#include "gk20a/gk20a.h"
+struct nvgpu_mem_sgl *nvgpu_mem_sgl_next(struct nvgpu_mem_sgl *sgl)
+{
+        return sgl->next;
+}
+u64 nvgpu_mem_sgl_phys(struct nvgpu_mem_sgl *sgl)
+{
+        return sgl->phys;
+}
+u64 nvgpu_mem_sgl_dma(struct nvgpu_mem_sgl *sgl)
+{
+        return sgl->dma;
+}
+u64 nvgpu_mem_sgl_length(struct nvgpu_mem_sgl *sgl)
+{
+        return sgl->length;
+}
+/*
+ * This builds a GPU address for the %sgl based on whether an IOMMU is present
+ * or not. It also handles turning the physical address into the true GPU
+ * physical address that should be programmed into the page tables.
+ */
+u64 nvgpu_mem_sgl_gpu_addr(struct gk20a *g, struct nvgpu_mem_sgl *sgl,
+                           struct nvgpu_gmmu_attrs *attrs)
+{
+        if (nvgpu_mem_sgl_dma(sgl) == 0)
+                return g->ops.mm.gpu_phys_addr(g, attrs,
+                                               nvgpu_mem_sgl_phys(sgl));
+        if (nvgpu_mem_sgl_dma(sgl) == DMA_ERROR_CODE)
+                return 0;
+        return gk20a_mm_smmu_vaddr_translate(g, nvgpu_mem_sgl_dma(sgl));
+}
+void nvgpu_mem_sgl_free(struct gk20a *g, struct nvgpu_mem_sgl *sgl)
+{
+        struct nvgpu_mem_sgl *next;
+        /*
+         * Free each of the elements. We expect each element to have been
+         * nvgpu_k[mz]alloc()ed.
+         */
+        while (sgl) {
+                next = nvgpu_mem_sgl_next(sgl);
+                nvgpu_kfree(g, sgl);
+                sgl = next;
+        }
+}
diff --git a/drivers/gpu/nvgpu/common/mm/page_allocator.c b/drivers/gpu/nvgpu/common/mm/page_allocator.c
index 72ff8f2d..6d92b457 100644
--- a/drivers/gpu/nvgpu/common/mm/page_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/page_allocator.c
@@ -147,19 +147,16 @@ static void __nvgpu_free_pages(struct nvgpu_page_allocator *a,
                               struct nvgpu_page_alloc *alloc,
                               bool free_buddy_alloc)
 {
-        struct page_alloc_chunk *chunk;
+        struct nvgpu_mem_sgl *sgl = alloc->sgl;
-        while (!nvgpu_list_empty(&alloc->alloc_chunks)) {
+        if (free_buddy_alloc) {
-                chunk = nvgpu_list_first_entry(&alloc->alloc_chunks,
+                while (sgl) {
-                                         page_alloc_chunk,
+                        nvgpu_free(&a->source_allocator, sgl->phys);
-                                         list_entry);
+                        sgl = nvgpu_mem_sgl_next(sgl);
-                nvgpu_list_del(&chunk->list_entry);
+                }
-                if (free_buddy_alloc)
-                        nvgpu_free(&a->source_allocator, chunk->base);
-                nvgpu_kmem_cache_free(a->chunk_cache, chunk);
        }
+        nvgpu_mem_sgl_free(a->owner->g, alloc->sgl);
        nvgpu_kmem_cache_free(a->alloc_cache, alloc);
 }
@@ -243,15 +240,14 @@ static void free_slab_page(struct nvgpu_page_allocator *a,
 }
 /*
- * This expects @alloc to have 1 empty page_alloc_chunk already added to the
+ * This expects @alloc to have 1 empty sgl_entry ready for usage.
- * alloc_chunks list.
 */
 static int __do_slab_alloc(struct nvgpu_page_allocator *a,
                           struct page_alloc_slab *slab,
                           struct nvgpu_page_alloc *alloc)
 {
        struct page_alloc_slab_page *slab_page = NULL;
-        struct page_alloc_chunk *chunk;
+        struct nvgpu_mem_sgl *sgl;
        unsigned long offs;
        /*
@@ -302,18 +298,19 @@ static int __do_slab_alloc(struct nvgpu_page_allocator *a,
                BUG(); /* Should be impossible to hit this. */
        /*
-         * Handle building the nvgpu_page_alloc struct. We expect one
+         * Handle building the nvgpu_page_alloc struct. We expect one sgl
-         * page_alloc_chunk to be present.
+         * to be present.
         */
        alloc->slab_page = slab_page;
        alloc->nr_chunks = 1;
        alloc->length = slab_page->slab_size;
        alloc->base = slab_page->page_addr + (offs * slab_page->slab_size);
-        chunk = nvgpu_list_first_entry(&alloc->alloc_chunks,
+        sgl         = alloc->sgl;
-                                page_alloc_chunk, list_entry);
+        sgl->phys   = alloc->base;
-        chunk->base = alloc->base;
+        sgl->dma    = alloc->base;
-        chunk->length = alloc->length;
+        sgl->length = alloc->length;
+        sgl->next   = NULL;
        return 0;
 }
@@ -327,7 +324,7 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_slab(
        int err, slab_nr;
        struct page_alloc_slab *slab;
        struct nvgpu_page_alloc *alloc = NULL;
-        struct page_alloc_chunk *chunk = NULL;
+        struct nvgpu_mem_sgl *sgl = NULL;
        /*
         * Align the length to a page and then divide by the page size (4k for
@@ -341,15 +338,13 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_slab(
                palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n");
                goto fail;
        }
-        chunk = nvgpu_kmem_cache_alloc(a->chunk_cache);
+        sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl));
-        if (!chunk) {
+        if (!sgl) {
-                palloc_dbg(a, "OOM: could not alloc alloc_chunk struct!\n");
+                palloc_dbg(a, "OOM: could not alloc sgl struct!\n");
                goto fail;
        }
-        nvgpu_init_list_node(&alloc->alloc_chunks);
+        alloc->sgl = sgl;
-        nvgpu_list_add(&chunk->list_entry, &alloc->alloc_chunks);
        err = __do_slab_alloc(a, slab, alloc);
        if (err)
                goto fail;
@@ -363,8 +358,8 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_slab(
 fail:
        if (alloc)
                nvgpu_kmem_cache_free(a->alloc_cache, alloc);
-        if (chunk)
+        if (sgl)
-                nvgpu_kmem_cache_free(a->chunk_cache, chunk);
+                nvgpu_kfree(a->owner->g, sgl);
        return NULL;
 }
@@ -426,7 +421,7 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages(
        struct nvgpu_page_allocator *a, u64 pages)
 {
        struct nvgpu_page_alloc *alloc;
-        struct page_alloc_chunk *c;
+        struct nvgpu_mem_sgl *sgl, *prev_sgl = NULL;
        u64 max_chunk_len = pages << a->page_shift;
        int i = 0;
@@ -436,7 +431,6 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages(
        memset(alloc, 0, sizeof(*alloc));
-        nvgpu_init_list_node(&alloc->alloc_chunks);
        alloc->length = pages << a->page_shift;
        while (pages) {
@@ -482,36 +476,48 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages(
                        goto fail_cleanup;
                }
-                c = nvgpu_kmem_cache_alloc(a->chunk_cache);
+                sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl));
-                if (!c) {
+                if (!sgl) {
                        nvgpu_free(&a->source_allocator, chunk_addr);
                        goto fail_cleanup;
                }
                pages -= chunk_pages;
-                c->base = chunk_addr;
+                sgl->phys   = chunk_addr;
-                c->length = chunk_len;
+                sgl->dma    = chunk_addr;
-                nvgpu_list_add(&c->list_entry, &alloc->alloc_chunks);
+                sgl->length = chunk_len;
+                /*
+                 * Build the singly linked list with a head node that is part of
+                 * the list.
+                 */
+                if (prev_sgl)
+                        prev_sgl->next = sgl;
+                else
+                        alloc->sgl = sgl;
+                prev_sgl = sgl;
                i++;
        }
        alloc->nr_chunks = i;
-        c = nvgpu_list_first_entry(&alloc->alloc_chunks,
+        alloc->base = alloc->sgl->phys;
-                                page_alloc_chunk, list_entry);
-        alloc->base = c->base;
        return alloc;
 fail_cleanup:
-        while (!nvgpu_list_empty(&alloc->alloc_chunks)) {
+        sgl = alloc->sgl;
-                c = nvgpu_list_first_entry(&alloc->alloc_chunks,
+        while (sgl) {
-                                     page_alloc_chunk, list_entry);
+                struct nvgpu_mem_sgl *next = sgl->next;
-                nvgpu_list_del(&c->list_entry);
-                nvgpu_free(&a->source_allocator, c->base);
+                nvgpu_free(&a->source_allocator, sgl->phys);
-                nvgpu_kmem_cache_free(a->chunk_cache, c);
+                nvgpu_kfree(a->owner->g, sgl);
+                sgl = next;
        }
        nvgpu_kmem_cache_free(a->alloc_cache, alloc);
 fail:
        return NULL;
@@ -521,7 +527,7 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages(
        struct nvgpu_page_allocator *a, u64 len)
 {
        struct nvgpu_page_alloc *alloc = NULL;
-        struct page_alloc_chunk *c;
+        struct nvgpu_mem_sgl *sgl;
        u64 pages;
        int i = 0;
@@ -536,11 +542,15 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages(
        palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n",
                   pages << a->page_shift, pages, alloc->base);
-        nvgpu_list_for_each_entry(c, &alloc->alloc_chunks,
+        sgl = alloc->sgl;
-                                  page_alloc_chunk, list_entry) {
+        while (sgl) {
                palloc_dbg(a, "  Chunk %2d: 0x%010llx + 0x%llx\n",
-                           i++, c->base, c->length);
+                           i++,
+                           nvgpu_mem_sgl_phys(sgl),
+                           nvgpu_mem_sgl_length(sgl));
+                sgl = sgl->next;
        }
+        palloc_dbg(a, "Alloc done\n");
        return alloc;
 }
@@ -638,11 +648,11 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed(
        struct nvgpu_page_allocator *a, u64 base, u64 length, u32 unused)
 {
        struct nvgpu_page_alloc *alloc;
-        struct page_alloc_chunk *c;
+        struct nvgpu_mem_sgl *sgl;
        alloc = nvgpu_kmem_cache_alloc(a->alloc_cache);
-        c = nvgpu_kmem_cache_alloc(a->chunk_cache);
+        sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl));
-        if (!alloc || !c)
+        if (!alloc || !sgl)
                goto fail;
        alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length, 0);
@@ -653,17 +663,18 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed(
        alloc->nr_chunks = 1;
        alloc->length = length;
-        nvgpu_init_list_node(&alloc->alloc_chunks);
+        alloc->sgl = sgl;
-        c->base = alloc->base;
+        sgl->phys   = alloc->base;
-        c->length = length;
+        sgl->dma    = alloc->base;
-        nvgpu_list_add(&c->list_entry, &alloc->alloc_chunks);
+        sgl->length = length;
+        sgl->next   = NULL;
        return alloc;
 fail:
-        if (c)
+        if (sgl)
-                nvgpu_kmem_cache_free(a->chunk_cache, c);
+                nvgpu_kfree(a->owner->g, sgl);
        if (alloc)
                nvgpu_kmem_cache_free(a->alloc_cache, alloc);
        return NULL;
@@ -677,7 +688,7 @@ static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a,
 {
        struct nvgpu_page_allocator *a = page_allocator(__a);
        struct nvgpu_page_alloc *alloc = NULL;
-        struct page_alloc_chunk *c;
+        struct nvgpu_mem_sgl *sgl;
        u64 aligned_len, pages;
        int i = 0;
@@ -697,10 +708,13 @@ static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a,
        palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)\n",
                   alloc->base, aligned_len, pages);
-        nvgpu_list_for_each_entry(c, &alloc->alloc_chunks,
+        sgl = alloc->sgl;
-                                  page_alloc_chunk, list_entry) {
+        while (sgl) {
                palloc_dbg(a, "  Chunk %2d: 0x%010llx + 0x%llx\n",
-                           i++, c->base, c->length);
+                           i++,
+                           nvgpu_mem_sgl_phys(sgl),
+                           nvgpu_mem_sgl_length(sgl));
+                sgl = sgl->next;
        }
        a->nr_fixed_allocs++;
@@ -896,11 +910,9 @@ int nvgpu_page_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
        a->alloc_cache = nvgpu_kmem_cache_create(g,
                                        sizeof(struct nvgpu_page_alloc));
-        a->chunk_cache = nvgpu_kmem_cache_create(g,
-                                        sizeof(struct page_alloc_chunk));
        a->slab_page_cache = nvgpu_kmem_cache_create(g,
                                        sizeof(struct page_alloc_slab_page));
-        if (!a->alloc_cache || !a->chunk_cache || !a->slab_page_cache) {
+        if (!a->alloc_cache || !a->slab_page_cache) {
                err = -ENOMEM;
                goto fail;
        }
@@ -941,8 +953,6 @@ int nvgpu_page_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
 fail:
        if (a->alloc_cache)
                nvgpu_kmem_cache_destroy(a->alloc_cache);
-        if (a->chunk_cache)
-                nvgpu_kmem_cache_destroy(a->chunk_cache);
        if (a->slab_page_cache)
                nvgpu_kmem_cache_destroy(a->slab_page_cache);
        nvgpu_kfree(g, a);
diff --git a/drivers/gpu/nvgpu/common/pramin.c b/drivers/gpu/nvgpu/common/pramin.c
index 425bfdb4..bb7d930e 100644
--- a/drivers/gpu/nvgpu/common/pramin.c
+++ b/drivers/gpu/nvgpu/common/pramin.c
@@ -84,37 +84,40 @@ void nvgpu_pramin_access_batched(struct gk20a *g, struct nvgpu_mem *mem,
                u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg)
 {
        struct nvgpu_page_alloc *alloc = NULL;
-        struct page_alloc_chunk *chunk = NULL;
+        struct nvgpu_mem_sgl *sgl;
        u32 byteoff, start_reg, until_end, n;
        alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl);
-        nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks,
+        sgl = alloc->sgl;
-                        page_alloc_chunk, list_entry) {
+        while (sgl) {
-                if (offset >= chunk->length)
+                if (offset >= nvgpu_mem_sgl_length(sgl)) {
-                        offset -= chunk->length;
+                        offset -= nvgpu_mem_sgl_length(sgl);
-                else
+                        sgl = sgl->next;
+                } else {
                        break;
+                }
        }
        while (size) {
-                byteoff = g->ops.pramin.enter(g, mem, chunk,
+                u32 sgl_len = (u32)nvgpu_mem_sgl_length(sgl);
+                byteoff = g->ops.pramin.enter(g, mem, sgl,
                                              offset / sizeof(u32));
                start_reg = g->ops.pramin.data032_r(byteoff / sizeof(u32));
                until_end = SZ_1M - (byteoff & (SZ_1M - 1));
-                n = min3(size, until_end, (u32)(chunk->length - offset));
+                n = min3(size, until_end, (u32)(sgl_len - offset));
                loop(g, start_reg, n / sizeof(u32), arg);
                /* read back to synchronize accesses */
                gk20a_readl(g, start_reg);
-                g->ops.pramin.exit(g, mem, chunk);
+                g->ops.pramin.exit(g, mem, sgl);
                size -= n;
-                if (n == (chunk->length - offset)) {
+                if (n == (sgl_len - offset)) {
-                        chunk = nvgpu_list_next_entry(chunk, page_alloc_chunk,
+                        sgl = nvgpu_mem_sgl_next(sgl);
-                                        list_entry);
                        offset = 0;
                } else {
                        offset += n;