3 files changed, 27 insertions, 7 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
index 89d71b3b..0b57b88d 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -515,12 +515,20 @@ static int __nvgpu_gmmu_do_update_page_table(struct vm_gk20a *vm,
        /*
         * At this point we have a scatter-gather list pointing to some number
         * of discontiguous chunks of memory. We must iterate over that list and
-         * generate a GMMU map call for each chunk. There are two possibilities:
+         * generate a GMMU map call for each chunk. There are several
-         * either an IOMMU is enabled or not. When an IOMMU is enabled the
+         * possibilities:
-         * mapping is simple since the "physical" address is actually a virtual
+         *
-         * IO address and will be contiguous.
+         *  1. IOMMU enabled, IOMMU addressing (typical iGPU)
+         *  2. IOMMU enabled, IOMMU bypass     (NVLINK bypasses SMMU)
+         *  3. IOMMU disabled                  (less common but still supported)
+         *  4. VIDMEM
+         *
+         * For (1) we can assume that there's really only one actual SG chunk
+         * since the IOMMU gives us a single contiguous address range. However,
+         * for (2), (3) and (4) we have to actually go through each SG entry and
+         * map each chunk individually.
         */
-        if (attrs->aperture == APERTURE_SYSMEM &&
+        if (nvgpu_aperture_is_sysmem(attrs->aperture) &&
            nvgpu_iommuable(g) &&
            nvgpu_sgt_iommuable(g, sgt)) {
                u64 io_addr = nvgpu_sgt_get_gpu_addr(g, sgt, sgt->sgl, attrs);
@@ -538,8 +546,8 @@ static int __nvgpu_gmmu_do_update_page_table(struct vm_gk20a *vm,
        }
        /*
-         * Finally: last possible case: do the no-IOMMU mapping. In this case we
+         * Handle cases (2), (3), and (4): do the no-IOMMU mapping. In this case
-         * really are mapping physical pages directly.
+         * we really are mapping physical pages directly.
         */
        nvgpu_sgt_for_each_sgl(sgl, sgt) {
                u64 phys_addr;
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
index 4def4938..855d455d 100644
--- a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
@@ -74,6 +74,15 @@ u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem,
                                     sysmem_mask, sysmem_coh_mask, vidmem_mask);
 }
+bool nvgpu_aperture_is_sysmem(enum nvgpu_aperture ap)
+{
+        return ap == __APERTURE_SYSMEM_COH || ap == APERTURE_SYSMEM;
+}
+bool nvgpu_mem_is_sysmem(struct nvgpu_mem *mem)
+{
+        return nvgpu_aperture_is_sysmem(mem->aperture);
+}
 struct nvgpu_sgl *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt,
                                     struct nvgpu_sgl *sgl)
diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
index 302f7287..c5e3e752 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
@@ -217,6 +217,9 @@ static inline const char *nvgpu_aperture_str(struct gk20a *g,
        return "UNKNOWN";
 }
+bool nvgpu_aperture_is_sysmem(enum nvgpu_aperture ap);
+bool nvgpu_mem_is_sysmem(struct nvgpu_mem *mem);
 /*
 * Returns true if the passed nvgpu_mem has been allocated (i.e it's valid for
 * subsequent use).