summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/linux
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2017-05-25 19:56:50 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-09-22 15:52:48 -0400
commit0090ee5aca268a3c359f34c74b8c521df3bd8593 (patch)
tree2779dc64554cdb38b717ce09c0e3dcbf36107ed3 /drivers/gpu/nvgpu/common/linux
parente32cc0108cf2ef5de7a17f0f6c0aa9af7faf23ed (diff)
gpu: nvgpu: nvgpu SGL implementation
The last major item preventing the core MM code in the nvgpu driver from being platform agnostic is the usage of Linux scattergather tables and scattergather lists. These data structures are used throughout the mapping code to handle discontiguous DMA allocations and also overloaded to represent VIDMEM allocs. The notion of a scatter gather table is crucial to a HW device that can handle discontiguous DMA. The GPU has a MMU which allows the GPU to do page gathering and present a virtually contiguous buffer to the GPU HW. As a result it makes sense for the GPU driver to use some sort of scatter gather concept so maximize memory usage efficiency. To that end this patch keeps the notion of a scatter gather list but implements it in the nvgpu common code. It is based heavily on the Linux SGL concept. It is a singly linked list of blocks - each representing a chunk of memory. To map or use a DMA allocation SW must iterate over each block in the SGL. This patch implements the most basic level of support for this data structure. There are certainly easy optimizations that could be done to speed up the current implementation. However, this patches' goal is to simply divest the core MM code from any last Linux'isms. Speed and efficiency come next. Change-Id: Icf44641db22d87fa1d003debbd9f71b605258e42 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1530867 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux')
-rw-r--r--drivers/gpu/nvgpu/common/linux/nvgpu_mem.c114
-rw-r--r--drivers/gpu/nvgpu/common/linux/vm.c25
2 files changed, 132 insertions, 7 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
index e4991d0d..eb54f3fd 100644
--- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
@@ -21,6 +21,7 @@
21#include <nvgpu/log.h> 21#include <nvgpu/log.h>
22#include <nvgpu/bug.h> 22#include <nvgpu/bug.h>
23#include <nvgpu/enabled.h> 23#include <nvgpu/enabled.h>
24#include <nvgpu/kmem.h>
24 25
25#include <nvgpu/linux/dma.h> 26#include <nvgpu/linux/dma.h>
26 27
@@ -395,3 +396,116 @@ int __nvgpu_mem_create_from_pages(struct gk20a *g, struct nvgpu_mem *dest,
395 396
396 return 0; 397 return 0;
397} 398}
399
400static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_dup(struct gk20a *g,
401 struct nvgpu_mem_sgl *sgl)
402{
403 struct nvgpu_mem_sgl *head, *next;
404
405 head = nvgpu_kzalloc(g, sizeof(*sgl));
406 if (!head)
407 return NULL;
408
409 next = head;
410 while (true) {
411 nvgpu_log(g, gpu_dbg_sgl,
412 " phys: 0x%-12llx dma: 0x%-12llx len: 0x%llx",
413 sgl->phys, sgl->dma, sgl->length);
414
415 next->dma = sgl->dma;
416 next->phys = sgl->phys;
417 next->length = sgl->length;
418 next->next = NULL;
419
420 sgl = nvgpu_mem_sgl_next(sgl);
421 if (!sgl)
422 break;
423
424 next->next = nvgpu_kzalloc(g, sizeof(*sgl));
425 if (!next->next) {
426 nvgpu_mem_sgl_free(g, head);
427 return NULL;
428 }
429 next = next->next;
430 }
431
432 return head;
433}
434
435static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_create_from_vidmem(
436 struct gk20a *g,
437 struct scatterlist *linux_sgl)
438{
439 struct nvgpu_page_alloc *vidmem_alloc;
440
441 vidmem_alloc = get_vidmem_page_alloc(linux_sgl);
442 if (!vidmem_alloc)
443 return NULL;
444
445 nvgpu_log(g, gpu_dbg_sgl, "Vidmem sgl:");
446
447 return __nvgpu_mem_sgl_dup(g, vidmem_alloc->sgl);
448}
449
450struct nvgpu_mem_sgl *nvgpu_mem_sgl_create(struct gk20a *g,
451 struct sg_table *sgt)
452{
453 struct nvgpu_mem_sgl *head, *sgl, *next;
454 struct scatterlist *linux_sgl = sgt->sgl;
455
456 if (is_vidmem_page_alloc(sg_dma_address(linux_sgl)))
457 return __nvgpu_mem_sgl_create_from_vidmem(g, linux_sgl);
458
459 head = nvgpu_kzalloc(g, sizeof(*sgl));
460 if (!head)
461 return NULL;
462
463 nvgpu_log(g, gpu_dbg_sgl, "Making sgl:");
464
465 sgl = head;
466 while (true) {
467 sgl->dma = sg_dma_address(linux_sgl);
468 sgl->phys = sg_phys(linux_sgl);
469 sgl->length = linux_sgl->length;
470
471 /*
472 * We don't like offsets in the pages here. This will cause
473 * problems.
474 */
475 if (WARN_ON(linux_sgl->offset)) {
476 nvgpu_mem_sgl_free(g, head);
477 return NULL;
478 }
479
480 nvgpu_log(g, gpu_dbg_sgl,
481 " phys: 0x%-12llx dma: 0x%-12llx len: 0x%llx",
482 sgl->phys, sgl->dma, sgl->length);
483
484 /*
485 * When there's no more SGL ents for the Linux SGL we are
486 * done. Don't bother making any more SGL ents for the nvgpu
487 * SGL.
488 */
489 linux_sgl = sg_next(linux_sgl);
490 if (!linux_sgl)
491 break;
492
493 next = nvgpu_kzalloc(g, sizeof(*sgl));
494 if (!next) {
495 nvgpu_mem_sgl_free(g, head);
496 return NULL;
497 }
498
499 sgl->next = next;
500 sgl = next;
501 }
502
503 nvgpu_log(g, gpu_dbg_sgl, "Done!");
504 return head;
505}
506
507struct nvgpu_mem_sgl *nvgpu_mem_sgl_create_from_mem(struct gk20a *g,
508 struct nvgpu_mem *mem)
509{
510 return nvgpu_mem_sgl_create(g, mem->priv.sgt);
511}
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c
index 86d8bec9..4a4429dc 100644
--- a/drivers/gpu/nvgpu/common/linux/vm.c
+++ b/drivers/gpu/nvgpu/common/linux/vm.c
@@ -21,8 +21,11 @@
21#include <nvgpu/lock.h> 21#include <nvgpu/lock.h>
22#include <nvgpu/rbtree.h> 22#include <nvgpu/rbtree.h>
23#include <nvgpu/vm_area.h> 23#include <nvgpu/vm_area.h>
24#include <nvgpu/nvgpu_mem.h>
24#include <nvgpu/page_allocator.h> 25#include <nvgpu/page_allocator.h>
25 26
27#include <nvgpu/linux/nvgpu_mem.h>
28
26#include "gk20a/gk20a.h" 29#include "gk20a/gk20a.h"
27#include "gk20a/mm_gk20a.h" 30#include "gk20a/mm_gk20a.h"
28#include "gk20a/kind_gk20a.h" 31#include "gk20a/kind_gk20a.h"
@@ -66,17 +69,19 @@ static u64 nvgpu_get_buffer_alignment(struct gk20a *g, struct scatterlist *sgl,
66 69
67 if (aperture == APERTURE_VIDMEM) { 70 if (aperture == APERTURE_VIDMEM) {
68 struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl); 71 struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl);
69 struct page_alloc_chunk *chunk = NULL; 72 struct nvgpu_mem_sgl *sgl_vid = alloc->sgl;
70 73
71 nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks, 74 while (sgl_vid) {
72 page_alloc_chunk, list_entry) { 75 chunk_align = 1ULL <<
73 chunk_align = 1ULL << __ffs(chunk->base | 76 __ffs(nvgpu_mem_sgl_phys(sgl_vid) |
74 chunk->length); 77 nvgpu_mem_sgl_length(sgl_vid));
75 78
76 if (align) 79 if (align)
77 align = min(align, chunk_align); 80 align = min(align, chunk_align);
78 else 81 else
79 align = chunk_align; 82 align = chunk_align;
83
84 sgl_vid = nvgpu_mem_sgl_next(sgl_vid);
80 } 85 }
81 86
82 return align; 87 return align;
@@ -237,6 +242,7 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
237 struct nvgpu_vm_area *vm_area = NULL; 242 struct nvgpu_vm_area *vm_area = NULL;
238 u32 ctag_offset; 243 u32 ctag_offset;
239 enum nvgpu_aperture aperture; 244 enum nvgpu_aperture aperture;
245 struct nvgpu_mem_sgl *nvgpu_sgl;
240 246
241 /* 247 /*
242 * The kind used as part of the key for map caching. HW may 248 * The kind used as part of the key for map caching. HW may
@@ -393,9 +399,12 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
393 ctag_offset += buffer_offset >> 399 ctag_offset += buffer_offset >>
394 ilog2(g->ops.fb.compression_page_size(g)); 400 ilog2(g->ops.fb.compression_page_size(g));
395 401
402 nvgpu_sgl = nvgpu_mem_sgl_create(g, bfr.sgt);
403
396 /* update gmmu ptes */ 404 /* update gmmu ptes */
397 map_offset = g->ops.mm.gmmu_map(vm, map_offset, 405 map_offset = g->ops.mm.gmmu_map(vm,
398 bfr.sgt, 406 map_offset,
407 nvgpu_sgl,
399 buffer_offset, /* sg offset */ 408 buffer_offset, /* sg offset */
400 mapping_size, 409 mapping_size,
401 bfr.pgsz_idx, 410 bfr.pgsz_idx,
@@ -410,6 +419,8 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
410 if (!map_offset) 419 if (!map_offset)
411 goto clean_up; 420 goto clean_up;
412 421
422 nvgpu_mem_sgl_free(g, nvgpu_sgl);
423
413 mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer)); 424 mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer));
414 if (!mapped_buffer) { 425 if (!mapped_buffer) {
415 nvgpu_warn(g, "oom allocating tracking buffer"); 426 nvgpu_warn(g, "oom allocating tracking buffer");