From 0090ee5aca268a3c359f34c74b8c521df3bd8593 Mon Sep 17 00:00:00 2001
From: Alex Waterman <alexw@nvidia.com>
Date: Thu, 25 May 2017 16:56:50 -0700
Subject: gpu: nvgpu: nvgpu SGL implementation

The last major item preventing the core MM code in the nvgpu
driver from being platform agnostic is the usage of Linux
scattergather tables and scattergather lists. These data
structures are used throughout the mapping code to handle
discontiguous DMA allocations and also overloaded to represent
VIDMEM allocs.

The notion of a scatter gather table is crucial to a HW device
that can handle discontiguous DMA. The GPU has a MMU which
allows the GPU to do page gathering and present a virtually
contiguous buffer to the GPU HW. As a result it makes sense
for the GPU driver to use some sort of scatter gather concept
so maximize memory usage efficiency.

To that end this patch keeps the notion of a scatter gather
list but implements it in the nvgpu common code. It is based
heavily on the Linux SGL concept. It is a singly linked list
of blocks - each representing a chunk of memory. To map or
use a DMA allocation SW must iterate over each block in the
SGL.

This patch implements the most basic level of support for this
data structure. There are certainly easy optimizations that
could be done to speed up the current implementation. However,
this patches' goal is to simply divest the core MM code from
any last Linux'isms. Speed and efficiency come next.

Change-Id: Icf44641db22d87fa1d003debbd9f71b605258e42
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1530867
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/include/nvgpu/gmmu.h            |  2 -
 drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h |  2 +
 drivers/gpu/nvgpu/include/nvgpu/log.h             |  1 +
 drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h       | 45 +++++++++++++++++++++++
 drivers/gpu/nvgpu/include/nvgpu/page_allocator.h  | 22 +++--------
 5 files changed, 54 insertions(+), 18 deletions(-)

(limited to 'drivers/gpu/nvgpu/include')

diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
index de129a5f..11060300 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
@@ -27,8 +27,6 @@
 #include <nvgpu/gmmu_t19x.h>
 #endif
 
-struct scatterlist;
-
 /*
  * This is the GMMU API visible to blocks outside of the GMMU. Basically this
  * API supports all the different types of mappings that might be done in the
diff --git a/drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h
index e2d4d336..f96c2801 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h
@@ -32,6 +32,8 @@ struct nvgpu_mem_priv {
 };
 
 u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl);
+struct nvgpu_mem_sgl *nvgpu_mem_sgl_create(struct gk20a *g,
+					   struct sg_table *sgt);
 
 /**
  * __nvgpu_mem_create_from_pages - Create an nvgpu_mem from physical pages.
diff --git a/drivers/gpu/nvgpu/include/nvgpu/log.h b/drivers/gpu/nvgpu/include/nvgpu/log.h
index 4cac3e70..cfce8c5b 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/log.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/log.h
@@ -71,6 +71,7 @@ enum nvgpu_log_categories {
 	gpu_dbg_pd_cache   = BIT(20),	/* PD cache traces. */
 	gpu_dbg_alloc      = BIT(21),	/* Allocator debugging. */
 	gpu_dbg_dma        = BIT(22),	/* DMA allocation prints. */
+	gpu_dbg_sgl        = BIT(23),	/* SGL related traces. */
 	gpu_dbg_mem        = BIT(31),	/* memory accesses; very verbose. */
 };
 
diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
index a112623e..7d19cf81 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
@@ -33,6 +33,8 @@ struct gk20a;
 struct nvgpu_allocator;
 struct nvgpu_gmmu_attrs;
 
+#define NVGPU_MEM_DMA_ERROR		(~0ULL)
+
 /*
  * Real location of a buffer - nvgpu_aperture_mask() will deduce what will be
  * told to the gpu about the aperture, but this flag designates where the
@@ -44,6 +46,28 @@ enum nvgpu_aperture {
 	APERTURE_VIDMEM
 };
 
+/*
+ * This struct holds the necessary information for describing a struct
+ * nvgpu_mem's scatter gather list.
+ *
+ * These are created in a platform dependent way. As a result the function
+ * definition for allocating these lives in the <nvgpu/_OS_/nvgpu_mem.h> file.
+ */
+struct nvgpu_mem_sgl {
+	/*
+	 * Internally this is implemented as a singly linked list.
+	 */
+	struct nvgpu_mem_sgl	*next;
+
+	/*
+	 * There is both a phys address and a DMA address since some systems,
+	 * for example ones with an IOMMU, may see these as different addresses.
+	 */
+	u64			 phys;
+	u64			 dma;
+	u64			 length;
+};
+
 struct nvgpu_mem {
 	/*
 	 * Populated for all nvgpu_mem structs - vidmem or system.
@@ -176,6 +200,27 @@ int nvgpu_mem_create_from_mem(struct gk20a *g,
 			      struct nvgpu_mem *dest, struct nvgpu_mem *src,
 			      int start_page, int nr_pages);
 
+/**
+ * nvgpu_mem_sgl_create_from_mem - Create a scatter list from an nvgpu_mem.
+ *
+ * @g   - The GPU.
+ * @mem - The source memory allocation to use.
+ *
+ * Create a scatter gather list from the passed @mem struct. This list lets the
+ * calling code iterate across each chunk of a DMA allocation for when that DMA
+ * allocation is not completely contiguous.
+ */
+struct nvgpu_mem_sgl *nvgpu_mem_sgl_create_from_mem(struct gk20a *g,
+						    struct nvgpu_mem *mem);
+void nvgpu_mem_sgl_free(struct gk20a *g, struct nvgpu_mem_sgl *sgl);
+
+struct nvgpu_mem_sgl *nvgpu_mem_sgl_next(struct nvgpu_mem_sgl *sgl);
+u64 nvgpu_mem_sgl_phys(struct nvgpu_mem_sgl *sgl);
+u64 nvgpu_mem_sgl_dma(struct nvgpu_mem_sgl *sgl);
+u64 nvgpu_mem_sgl_length(struct nvgpu_mem_sgl *sgl);
+u64 nvgpu_mem_sgl_gpu_addr(struct gk20a *g, struct nvgpu_mem_sgl *sgl,
+			   struct nvgpu_gmmu_attrs *attrs);
+
 /*
  * Buffer accessors - wrap between begin() and end() if there is no permanent
  * kernel mapping for this buffer.
diff --git a/drivers/gpu/nvgpu/include/nvgpu/page_allocator.h b/drivers/gpu/nvgpu/include/nvgpu/page_allocator.h
index 9a5ef8d3..de83ca7f 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/page_allocator.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/page_allocator.h
@@ -18,6 +18,7 @@
 #define PAGE_ALLOCATOR_PRIV_H
 
 #include <nvgpu/allocator.h>
+#include <nvgpu/nvgpu_mem.h>
 #include <nvgpu/kmem.h>
 #include <nvgpu/list.h>
 #include <nvgpu/rbtree.h>
@@ -83,27 +84,17 @@ page_alloc_slab_page_from_list_entry(struct nvgpu_list_node *node)
 	((uintptr_t)node - offsetof(struct page_alloc_slab_page, list_entry));
 };
 
-struct page_alloc_chunk {
-	struct nvgpu_list_node list_entry;
-
-	u64 base;
-	u64 length;
-};
-
-static inline struct page_alloc_chunk *
-page_alloc_chunk_from_list_entry(struct nvgpu_list_node *node)
-{
-	return (struct page_alloc_chunk *)
-	((uintptr_t)node - offsetof(struct page_alloc_chunk, list_entry));
-};
-
 /*
  * Struct to handle internal management of page allocation. It holds a list
  * of the chunks of pages that make up the overall allocation - much like a
  * scatter gather table.
  */
 struct nvgpu_page_alloc {
-	struct nvgpu_list_node alloc_chunks;
+	/*
+	 * nvgpu_mem_sgl for describing the actual allocation. Convenient for
+	 * GMMU mapping.
+	 */
+	struct nvgpu_mem_sgl *sgl;
 
 	int nr_chunks;
 	u64 length;
@@ -156,7 +147,6 @@ struct nvgpu_page_allocator {
 	int nr_slabs;
 
 	struct nvgpu_kmem_cache *alloc_cache;
-	struct nvgpu_kmem_cache *chunk_cache;
 	struct nvgpu_kmem_cache *slab_page_cache;
 
 	u64 flags;
-- 
cgit v1.2.2