1 files changed, 50 insertions, 2 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
index 9f677058..b4e718b4 100644
--- a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
@@ -47,13 +47,20 @@ u64 nvgpu_sgt_get_length(struct nvgpu_sgt *sgt, void *sgl)
        return sgt->ops->sgl_length(sgl);
 }
-u64 nvgpu_sgt_get_gpu_addr(struct nvgpu_sgt *sgt, struct gk20a *g, void *sgl,
+u64 nvgpu_sgt_get_gpu_addr(struct gk20a *g, struct nvgpu_sgt *sgt, void *sgl,
                           struct nvgpu_gmmu_attrs *attrs)
 {
        return sgt->ops->sgl_gpu_addr(g, sgl, attrs);
 }
-void nvgpu_sgt_free(struct nvgpu_sgt *sgt, struct gk20a *g)
+bool nvgpu_sgt_iommuable(struct gk20a *g, struct nvgpu_sgt *sgt)
+{
+        if (sgt->ops->sgt_iommuable)
+                return sgt->ops->sgt_iommuable(g, sgt);
+        return false;
+}
+void nvgpu_sgt_free(struct gk20a *g, struct nvgpu_sgt *sgt)
 {
        if (sgt && sgt->ops->sgt_free)
                sgt->ops->sgt_free(g, sgt);
@@ -69,3 +76,44 @@ u64 nvgpu_mem_iommu_translate(struct gk20a *g, u64 phys)
        return phys;
 }
+/*
+ * Determine alignment for a passed buffer. Necessary since the buffer may
+ * appear big enough to map with large pages but the SGL may have chunks that
+ * are not aligned on a 64/128kB large page boundary. There's also the
+ * possibility chunks are odd sizes which will necessitate small page mappings
+ * to correctly glue them together into a contiguous virtual mapping.
+ */
+u64 nvgpu_sgt_alignment(struct gk20a *g, struct nvgpu_sgt *sgt)
+{
+        u64 align = 0, chunk_align = 0;
+        void *sgl;
+        /*
+         * If this SGT is iommuable and we want to use the IOMMU address then
+         * the SGT's first entry has the IOMMU address. We will align on this
+         * and double check length of buffer later. Also, since there's an
+         * IOMMU we know that this DMA address is contiguous.
+         */
+        if (!g->mm.bypass_smmu &&
+            nvgpu_sgt_iommuable(g, sgt) &&
+            nvgpu_sgt_get_dma(sgt, sgt->sgl))
+                return 1ULL << __ffs(nvgpu_sgt_get_dma(sgt, sgt->sgl));
+        /*
+         * Otherwise the buffer is not iommuable (VIDMEM, for example) or we are
+         * bypassing the IOMMU and need to use the underlying physical entries
+         * of the SGT.
+         */
+        nvgpu_sgt_for_each_sgl(sgl, sgt) {
+                chunk_align = 1ULL << __ffs(nvgpu_sgt_get_phys(sgt, sgl) |
+                                            nvgpu_sgt_get_length(sgt, sgl));
+                if (align)
+                        align = min(align, chunk_align);
+                else
+                        align = chunk_align;
+        }
+        return align;
+}

diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c index 9f677058..b4e718b4 100644 --- a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c +++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
@@ -47,13 +47,20 @@ u64 nvgpu_sgt_get_length(struct nvgpu_sgt sgt, void sgl)
47	return sgt->ops->sgl_length(sgl);	47	return sgt->ops->sgl_length(sgl);
48	}	48	}
49		49
50	u64 nvgpu_sgt_get_gpu_addr(struct nvgpu_sgt sgt, struct gk20a g, void *sgl,	50	u64 nvgpu_sgt_get_gpu_addr(struct gk20a g, struct nvgpu_sgt sgt, void *sgl,
51	struct nvgpu_gmmu_attrs *attrs)	51	struct nvgpu_gmmu_attrs *attrs)
52	{	52	{
53	return sgt->ops->sgl_gpu_addr(g, sgl, attrs);	53	return sgt->ops->sgl_gpu_addr(g, sgl, attrs);
54	}	54	}
55		55
56	void nvgpu_sgt_free(struct nvgpu_sgt sgt, struct gk20a g)	56	bool nvgpu_sgt_iommuable(struct gk20a g, struct nvgpu_sgt sgt)
		57	{
		58	if (sgt->ops->sgt_iommuable)
		59	return sgt->ops->sgt_iommuable(g, sgt);
		60	return false;
		61	}
		62
		63	void nvgpu_sgt_free(struct gk20a g, struct nvgpu_sgt sgt)
57	{	64	{
58	if (sgt && sgt->ops->sgt_free)	65	if (sgt && sgt->ops->sgt_free)
59	sgt->ops->sgt_free(g, sgt);	66	sgt->ops->sgt_free(g, sgt);
@@ -69,3 +76,44 @@ u64 nvgpu_mem_iommu_translate(struct gk20a *g, u64 phys)
69		76
70	return phys;	77	return phys;
71	}	78	}
		79
		80	/*
		81	* Determine alignment for a passed buffer. Necessary since the buffer may
		82	* appear big enough to map with large pages but the SGL may have chunks that
		83	* are not aligned on a 64/128kB large page boundary. There's also the
		84	* possibility chunks are odd sizes which will necessitate small page mappings
		85	* to correctly glue them together into a contiguous virtual mapping.
		86	*/
		87	u64 nvgpu_sgt_alignment(struct gk20a g, struct nvgpu_sgt sgt)
		88	{
		89	u64 align = 0, chunk_align = 0;
		90	void *sgl;
		91
		92	/*
		93	* If this SGT is iommuable and we want to use the IOMMU address then
		94	* the SGT's first entry has the IOMMU address. We will align on this
		95	* and double check length of buffer later. Also, since there's an
		96	* IOMMU we know that this DMA address is contiguous.
		97	*/
		98	if (!g->mm.bypass_smmu &&
		99	nvgpu_sgt_iommuable(g, sgt) &&
		100	nvgpu_sgt_get_dma(sgt, sgt->sgl))
		101	return 1ULL << __ffs(nvgpu_sgt_get_dma(sgt, sgt->sgl));
		102
		103	/*
		104	* Otherwise the buffer is not iommuable (VIDMEM, for example) or we are
		105	* bypassing the IOMMU and need to use the underlying physical entries
		106	* of the SGT.
		107	*/
		108	nvgpu_sgt_for_each_sgl(sgl, sgt) {
		109	chunk_align = 1ULL << __ffs(nvgpu_sgt_get_phys(sgt, sgl) \|
		110	nvgpu_sgt_get_length(sgt, sgl));
		111
		112	if (align)
		113	align = min(align, chunk_align);
		114	else
		115	align = chunk_align;
		116	}
		117
		118	return align;
		119	}