gpu: nvgpu: nvgpu SGL implementation

The last major item preventing the core MM code in the nvgpu driver from being platform agnostic is the usage of Linux scattergather tables and scattergather lists. These data structures are used throughout the mapping code to handle discontiguous DMA allocations and also overloaded to represent VIDMEM allocs. The notion of a scatter gather table is crucial to a HW device that can handle discontiguous DMA. The GPU has a MMU which allows the GPU to do page gathering and present a virtually contiguous buffer to the GPU HW. As a result it makes sense for the GPU driver to use some sort of scatter gather concept so maximize memory usage efficiency. To that end this patch keeps the notion of a scatter gather list but implements it in the nvgpu common code. It is based heavily on the Linux SGL concept. It is a singly linked list of blocks - each representing a chunk of memory. To map or use a DMA allocation SW must iterate over each block in the SGL. This patch implements the most basic level of support for this data structure. There are certainly easy optimizations that could be done to speed up the current implementation. However, this patches' goal is to simply divest the core MM code from any last Linux'isms. Speed and efficiency come next. Change-Id: Icf44641db22d87fa1d003debbd9f71b605258e42 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1530867 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Alex Waterman <alexw@nvidia.com> 2017-05-25 19:56:50 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2017-09-22 15:52:48 -0400
commit: 0090ee5aca268a3c359f34c74b8c521df3bd8593 (patch)
tree: 2779dc64554cdb38b717ce09c0e3dcbf36107ed3 /drivers/gpu/nvgpu/common/linux
parent: e32cc0108cf2ef5de7a17f0f6c0aa9af7faf23ed (diff)
2 files changed, 132 insertions, 7 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
index e4991d0d..eb54f3fd 100644
--- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
@@ -21,6 +21,7 @@
 #include <nvgpu/log.h>
 #include <nvgpu/bug.h>
 #include <nvgpu/enabled.h>
+#include <nvgpu/kmem.h>
 #include <nvgpu/linux/dma.h>
@@ -395,3 +396,116 @@ int __nvgpu_mem_create_from_pages(struct gk20a *g, struct nvgpu_mem *dest,
        return 0;
 }
+static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_dup(struct gk20a *g,
+                                                 struct nvgpu_mem_sgl *sgl)
+{
+        struct nvgpu_mem_sgl *head, *next;
+        head = nvgpu_kzalloc(g, sizeof(*sgl));
+        if (!head)
+                return NULL;
+        next = head;
+        while (true) {
+                nvgpu_log(g, gpu_dbg_sgl,
+                          "  phys: 0x%-12llx dma: 0x%-12llx len: 0x%llx",
+                          sgl->phys, sgl->dma, sgl->length);
+                next->dma    = sgl->dma;
+                next->phys   = sgl->phys;
+                next->length = sgl->length;
+                next->next   = NULL;
+                sgl = nvgpu_mem_sgl_next(sgl);
+                if (!sgl)
+                        break;
+                next->next = nvgpu_kzalloc(g, sizeof(*sgl));
+                if (!next->next) {
+                        nvgpu_mem_sgl_free(g, head);
+                        return NULL;
+                }
+                next = next->next;
+        }
+        return head;
+}
+static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_create_from_vidmem(
+        struct gk20a *g,
+        struct scatterlist *linux_sgl)
+{
+        struct nvgpu_page_alloc *vidmem_alloc;
+        vidmem_alloc = get_vidmem_page_alloc(linux_sgl);
+        if (!vidmem_alloc)
+                return NULL;
+        nvgpu_log(g, gpu_dbg_sgl, "Vidmem sgl:");
+        return __nvgpu_mem_sgl_dup(g, vidmem_alloc->sgl);
+}
+struct nvgpu_mem_sgl *nvgpu_mem_sgl_create(struct gk20a *g,
+                                           struct sg_table *sgt)
+{
+        struct nvgpu_mem_sgl *head, *sgl, *next;
+        struct scatterlist *linux_sgl = sgt->sgl;
+        if (is_vidmem_page_alloc(sg_dma_address(linux_sgl)))
+                return __nvgpu_mem_sgl_create_from_vidmem(g, linux_sgl);
+        head = nvgpu_kzalloc(g, sizeof(*sgl));
+        if (!head)
+                return NULL;
+        nvgpu_log(g, gpu_dbg_sgl, "Making sgl:");
+        sgl = head;
+        while (true) {
+                sgl->dma    = sg_dma_address(linux_sgl);
+                sgl->phys   = sg_phys(linux_sgl);
+                sgl->length = linux_sgl->length;
+                /*
+                 * We don't like offsets in the pages here. This will cause
+                 * problems.
+                 */
+                if (WARN_ON(linux_sgl->offset)) {
+                        nvgpu_mem_sgl_free(g, head);
+                        return NULL;
+                }
+                nvgpu_log(g, gpu_dbg_sgl,
+                          "  phys: 0x%-12llx dma: 0x%-12llx len: 0x%llx",
+                          sgl->phys, sgl->dma, sgl->length);
+                /*
+                 * When there's no more SGL ents for the Linux SGL we are
+                 * done. Don't bother making any more SGL ents for the nvgpu
+                 * SGL.
+                 */
+                linux_sgl = sg_next(linux_sgl);
+                if (!linux_sgl)
+                        break;
+                next = nvgpu_kzalloc(g, sizeof(*sgl));
+                if (!next) {
+                        nvgpu_mem_sgl_free(g, head);
+                        return NULL;
+                }
+                sgl->next = next;
+                sgl = next;
+        }
+        nvgpu_log(g, gpu_dbg_sgl, "Done!");
+        return head;
+}
+struct nvgpu_mem_sgl *nvgpu_mem_sgl_create_from_mem(struct gk20a *g,
+                                                    struct nvgpu_mem *mem)
+{
+        return nvgpu_mem_sgl_create(g, mem->priv.sgt);
+}
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c
index 86d8bec9..4a4429dc 100644
--- a/drivers/gpu/nvgpu/common/linux/vm.c
+++ b/drivers/gpu/nvgpu/common/linux/vm.c
@@ -21,8 +21,11 @@
 #include <nvgpu/lock.h>
 #include <nvgpu/rbtree.h>
 #include <nvgpu/vm_area.h>
+#include <nvgpu/nvgpu_mem.h>
 #include <nvgpu/page_allocator.h>
+#include <nvgpu/linux/nvgpu_mem.h>
 #include "gk20a/gk20a.h"
 #include "gk20a/mm_gk20a.h"
 #include "gk20a/kind_gk20a.h"
@@ -66,17 +69,19 @@ static u64 nvgpu_get_buffer_alignment(struct gk20a *g, struct scatterlist *sgl,
        if (aperture == APERTURE_VIDMEM) {
                struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl);
-                struct page_alloc_chunk *chunk = NULL;
+                struct nvgpu_mem_sgl *sgl_vid = alloc->sgl;
-                nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks,
+                while (sgl_vid) {
-                                        page_alloc_chunk, list_entry) {
+                        chunk_align = 1ULL <<
-                        chunk_align = 1ULL << __ffs(chunk->base |
+                                __ffs(nvgpu_mem_sgl_phys(sgl_vid) |
-                                                    chunk->length);
+                                nvgpu_mem_sgl_length(sgl_vid));
                        if (align)
                                align = min(align, chunk_align);
                        else
                                align = chunk_align;
+                        sgl_vid = nvgpu_mem_sgl_next(sgl_vid);
                }
                return align;
@@ -237,6 +242,7 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
        struct nvgpu_vm_area *vm_area = NULL;
        u32 ctag_offset;
        enum nvgpu_aperture aperture;
+        struct nvgpu_mem_sgl *nvgpu_sgl;
        /*
         * The kind used as part of the key for map caching. HW may
@@ -393,9 +399,12 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
                ctag_offset += buffer_offset >>
                               ilog2(g->ops.fb.compression_page_size(g));
+        nvgpu_sgl = nvgpu_mem_sgl_create(g, bfr.sgt);
        /* update gmmu ptes */
-        map_offset = g->ops.mm.gmmu_map(vm, map_offset,
+        map_offset = g->ops.mm.gmmu_map(vm,
-                                        bfr.sgt,
+                                        map_offset,
+                                        nvgpu_sgl,
                                        buffer_offset, /* sg offset */
                                        mapping_size,
                                        bfr.pgsz_idx,
@@ -410,6 +419,8 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
        if (!map_offset)
                goto clean_up;
+        nvgpu_mem_sgl_free(g, nvgpu_sgl);
        mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer));
        if (!mapped_buffer) {
                nvgpu_warn(g, "oom allocating tracking buffer");
author	Alex Waterman <alexw@nvidia.com>	2017-05-25 19:56:50 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2017-09-22 15:52:48 -0400
commit	0090ee5aca268a3c359f34c74b8c521df3bd8593 (patch)
tree	2779dc64554cdb38b717ce09c0e3dcbf36107ed3 /drivers/gpu/nvgpu/common/linux
parent	e32cc0108cf2ef5de7a17f0f6c0aa9af7faf23ed (diff)

diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c index e4991d0d..eb54f3fd 100644 --- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c +++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
@@ -21,6 +21,7 @@
21	#include <nvgpu/log.h>	21	#include <nvgpu/log.h>
22	#include <nvgpu/bug.h>	22	#include <nvgpu/bug.h>
23	#include <nvgpu/enabled.h>	23	#include <nvgpu/enabled.h>
		24	#include <nvgpu/kmem.h>
24		25
25	#include <nvgpu/linux/dma.h>	26	#include <nvgpu/linux/dma.h>
26		27
@@ -395,3 +396,116 @@ int __nvgpu_mem_create_from_pages(struct gk20a g, struct nvgpu_mem dest,
395		396
396	return 0;	397	return 0;
397	}	398	}
		399
		400	static struct nvgpu_mem_sgl __nvgpu_mem_sgl_dup(struct gk20a g,
		401	struct nvgpu_mem_sgl *sgl)
		402	{
		403	struct nvgpu_mem_sgl head, next;
		404
		405	head = nvgpu_kzalloc(g, sizeof(*sgl));
		406	if (!head)
		407	return NULL;
		408
		409	next = head;
		410	while (true) {
		411	nvgpu_log(g, gpu_dbg_sgl,
		412	" phys: 0x%-12llx dma: 0x%-12llx len: 0x%llx",
		413	sgl->phys, sgl->dma, sgl->length);
		414
		415	next->dma = sgl->dma;
		416	next->phys = sgl->phys;
		417	next->length = sgl->length;
		418	next->next = NULL;
		419
		420	sgl = nvgpu_mem_sgl_next(sgl);
		421	if (!sgl)
		422	break;
		423
		424	next->next = nvgpu_kzalloc(g, sizeof(*sgl));
		425	if (!next->next) {
		426	nvgpu_mem_sgl_free(g, head);
		427	return NULL;
		428	}
		429	next = next->next;
		430	}
		431
		432	return head;
		433	}
		434
		435	static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_create_from_vidmem(
		436	struct gk20a *g,
		437	struct scatterlist *linux_sgl)
		438	{
		439	struct nvgpu_page_alloc *vidmem_alloc;
		440
		441	vidmem_alloc = get_vidmem_page_alloc(linux_sgl);
		442	if (!vidmem_alloc)
		443	return NULL;
		444
		445	nvgpu_log(g, gpu_dbg_sgl, "Vidmem sgl:");
		446
		447	return __nvgpu_mem_sgl_dup(g, vidmem_alloc->sgl);
		448	}
		449
		450	struct nvgpu_mem_sgl nvgpu_mem_sgl_create(struct gk20a g,
		451	struct sg_table *sgt)
		452	{
		453	struct nvgpu_mem_sgl head, sgl, *next;
		454	struct scatterlist *linux_sgl = sgt->sgl;
		455
		456	if (is_vidmem_page_alloc(sg_dma_address(linux_sgl)))
		457	return __nvgpu_mem_sgl_create_from_vidmem(g, linux_sgl);
		458
		459	head = nvgpu_kzalloc(g, sizeof(*sgl));
		460	if (!head)
		461	return NULL;
		462
		463	nvgpu_log(g, gpu_dbg_sgl, "Making sgl:");
		464
		465	sgl = head;
		466	while (true) {
		467	sgl->dma = sg_dma_address(linux_sgl);
		468	sgl->phys = sg_phys(linux_sgl);
		469	sgl->length = linux_sgl->length;
		470
		471	/*
		472	* We don't like offsets in the pages here. This will cause
		473	* problems.
		474	*/
		475	if (WARN_ON(linux_sgl->offset)) {
		476	nvgpu_mem_sgl_free(g, head);
		477	return NULL;
		478	}
		479
		480	nvgpu_log(g, gpu_dbg_sgl,
		481	" phys: 0x%-12llx dma: 0x%-12llx len: 0x%llx",
		482	sgl->phys, sgl->dma, sgl->length);
		483
		484	/*
		485	* When there's no more SGL ents for the Linux SGL we are
		486	* done. Don't bother making any more SGL ents for the nvgpu
		487	* SGL.
		488	*/
		489	linux_sgl = sg_next(linux_sgl);
		490	if (!linux_sgl)
		491	break;
		492
		493	next = nvgpu_kzalloc(g, sizeof(*sgl));
		494	if (!next) {
		495	nvgpu_mem_sgl_free(g, head);
		496	return NULL;
		497	}
		498
		499	sgl->next = next;
		500	sgl = next;
		501	}
		502
		503	nvgpu_log(g, gpu_dbg_sgl, "Done!");
		504	return head;
		505	}
		506
		507	struct nvgpu_mem_sgl nvgpu_mem_sgl_create_from_mem(struct gk20a g,
		508	struct nvgpu_mem *mem)
		509	{
		510	return nvgpu_mem_sgl_create(g, mem->priv.sgt);
		511	}


diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c index 86d8bec9..4a4429dc 100644 --- a/drivers/gpu/nvgpu/common/linux/vm.c +++ b/drivers/gpu/nvgpu/common/linux/vm.c
@@ -21,8 +21,11 @@
21	#include <nvgpu/lock.h>	21	#include <nvgpu/lock.h>
22	#include <nvgpu/rbtree.h>	22	#include <nvgpu/rbtree.h>
23	#include <nvgpu/vm_area.h>	23	#include <nvgpu/vm_area.h>
		24	#include <nvgpu/nvgpu_mem.h>
24	#include <nvgpu/page_allocator.h>	25	#include <nvgpu/page_allocator.h>
25		26
		27	#include <nvgpu/linux/nvgpu_mem.h>
		28
26	#include "gk20a/gk20a.h"	29	#include "gk20a/gk20a.h"
27	#include "gk20a/mm_gk20a.h"	30	#include "gk20a/mm_gk20a.h"
28	#include "gk20a/kind_gk20a.h"	31	#include "gk20a/kind_gk20a.h"
@@ -66,17 +69,19 @@ static u64 nvgpu_get_buffer_alignment(struct gk20a g, struct scatterlist sgl,
66		69
67	if (aperture == APERTURE_VIDMEM) {	70	if (aperture == APERTURE_VIDMEM) {
68	struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl);	71	struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl);
69	struct page_alloc_chunk *chunk = NULL;	72	struct nvgpu_mem_sgl *sgl_vid = alloc->sgl;
70		73
71	nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks,	74	while (sgl_vid) {
72	page_alloc_chunk, list_entry) {	75	chunk_align = 1ULL <<
73	chunk_align = 1ULL << __ffs(chunk->base \|	76	__ffs(nvgpu_mem_sgl_phys(sgl_vid) \|
74	chunk->length);	77	nvgpu_mem_sgl_length(sgl_vid));
75		78
76	if (align)	79	if (align)
77	align = min(align, chunk_align);	80	align = min(align, chunk_align);
78	else	81	else
79	align = chunk_align;	82	align = chunk_align;
		83
		84	sgl_vid = nvgpu_mem_sgl_next(sgl_vid);
80	}	85	}
81		86
82	return align;	87	return align;
@@ -237,6 +242,7 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
237	struct nvgpu_vm_area *vm_area = NULL;	242	struct nvgpu_vm_area *vm_area = NULL;
238	u32 ctag_offset;	243	u32 ctag_offset;
239	enum nvgpu_aperture aperture;	244	enum nvgpu_aperture aperture;
		245	struct nvgpu_mem_sgl *nvgpu_sgl;
240		246
241	/*	247	/*
242	* The kind used as part of the key for map caching. HW may	248	* The kind used as part of the key for map caching. HW may
@@ -393,9 +399,12 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
393	ctag_offset += buffer_offset >>	399	ctag_offset += buffer_offset >>
394	ilog2(g->ops.fb.compression_page_size(g));	400	ilog2(g->ops.fb.compression_page_size(g));
395		401
		402	nvgpu_sgl = nvgpu_mem_sgl_create(g, bfr.sgt);
		403
396	/* update gmmu ptes */	404	/* update gmmu ptes */
397	map_offset = g->ops.mm.gmmu_map(vm, map_offset,	405	map_offset = g->ops.mm.gmmu_map(vm,
398	bfr.sgt,	406	map_offset,
		407	nvgpu_sgl,
399	buffer_offset, /* sg offset */	408	buffer_offset, /* sg offset */
400	mapping_size,	409	mapping_size,
401	bfr.pgsz_idx,	410	bfr.pgsz_idx,
@@ -410,6 +419,8 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
410	if (!map_offset)	419	if (!map_offset)
411	goto clean_up;	420	goto clean_up;
412		421
		422	nvgpu_mem_sgl_free(g, nvgpu_sgl);
		423
413	mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer));	424	mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer));
414	if (!mapped_buffer) {	425	if (!mapped_buffer) {
415	nvgpu_warn(g, "oom allocating tracking buffer");	426	nvgpu_warn(g, "oom allocating tracking buffer");