diff options
author | Sunny He <suhe@nvidia.com> | 2017-08-15 15:01:04 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-09-22 15:55:24 -0400 |
commit | 17c581d75514c32d1e8c1e416beb33b3ccce22a5 (patch) | |
tree | a25d063f19b8e1f83f61af418f3aa2ac32fe0cce /drivers/gpu/nvgpu/common/linux | |
parent | 0090ee5aca268a3c359f34c74b8c521df3bd8593 (diff) |
gpu: nvgpu: SGL passthrough implementation
The basic nvgpu_mem_sgl implementation provides support
for OS specific scatter-gather list implementations by
simply copying them node by node. This is inefficient,
taking extra time and memory.
This patch implements an nvgpu_mem_sgt struct to act as
a header which is inserted at the front of any scatter-
gather list implementation. This labels every struct
with a set of ops which can be used to interact with
the attached scatter gather list.
Since nvgpu common code only has to interact with these
function pointers, any sgl implementation can be used.
Initialization only requires the allocation of a single
struct, removing the need to copy or iterate through the
sgl being converted.
Jira NVGPU-186
Change-Id: I2994f804a4a4cc141b702e987e9081d8560ba2e8
Signed-off-by: Sunny He <suhe@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1541426
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux')
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/nvgpu_mem.c | 144 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/vm.c | 17 |
2 files changed, 70 insertions, 91 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c index eb54f3fd..8d8909dd 100644 --- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c +++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c | |||
@@ -397,42 +397,59 @@ int __nvgpu_mem_create_from_pages(struct gk20a *g, struct nvgpu_mem *dest, | |||
397 | return 0; | 397 | return 0; |
398 | } | 398 | } |
399 | 399 | ||
400 | static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_dup(struct gk20a *g, | 400 | static void *nvgpu_mem_linux_sgl_next(void *sgl) |
401 | struct nvgpu_mem_sgl *sgl) | ||
402 | { | 401 | { |
403 | struct nvgpu_mem_sgl *head, *next; | 402 | return sg_next((struct scatterlist *)sgl); |
403 | } | ||
404 | 404 | ||
405 | head = nvgpu_kzalloc(g, sizeof(*sgl)); | 405 | static u64 nvgpu_mem_linux_sgl_phys(void *sgl) |
406 | if (!head) | 406 | { |
407 | return NULL; | 407 | return (u64)sg_phys((struct scatterlist *)sgl); |
408 | } | ||
408 | 409 | ||
409 | next = head; | 410 | static u64 nvgpu_mem_linux_sgl_dma(void *sgl) |
410 | while (true) { | 411 | { |
411 | nvgpu_log(g, gpu_dbg_sgl, | 412 | return (u64)sg_dma_address((struct scatterlist *)sgl); |
412 | " phys: 0x%-12llx dma: 0x%-12llx len: 0x%llx", | 413 | } |
413 | sgl->phys, sgl->dma, sgl->length); | ||
414 | |||
415 | next->dma = sgl->dma; | ||
416 | next->phys = sgl->phys; | ||
417 | next->length = sgl->length; | ||
418 | next->next = NULL; | ||
419 | |||
420 | sgl = nvgpu_mem_sgl_next(sgl); | ||
421 | if (!sgl) | ||
422 | break; | ||
423 | |||
424 | next->next = nvgpu_kzalloc(g, sizeof(*sgl)); | ||
425 | if (!next->next) { | ||
426 | nvgpu_mem_sgl_free(g, head); | ||
427 | return NULL; | ||
428 | } | ||
429 | next = next->next; | ||
430 | } | ||
431 | 414 | ||
432 | return head; | 415 | static u64 nvgpu_mem_linux_sgl_length(void *sgl) |
416 | { | ||
417 | return (u64)((struct scatterlist *)sgl)->length; | ||
433 | } | 418 | } |
434 | 419 | ||
435 | static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_create_from_vidmem( | 420 | static u64 nvgpu_mem_linux_sgl_gpu_addr(struct gk20a *g, void *sgl, |
421 | struct nvgpu_gmmu_attrs *attrs) | ||
422 | { | ||
423 | if (sg_dma_address((struct scatterlist *)sgl) == 0) | ||
424 | return g->ops.mm.gpu_phys_addr(g, attrs, | ||
425 | sg_phys((struct scatterlist *)sgl)); | ||
426 | |||
427 | if (sg_dma_address((struct scatterlist *)sgl) == DMA_ERROR_CODE) | ||
428 | return 0; | ||
429 | |||
430 | return gk20a_mm_smmu_vaddr_translate(g, | ||
431 | sg_dma_address((struct scatterlist *)sgl)); | ||
432 | } | ||
433 | |||
434 | static void nvgpu_mem_linux_sgl_free(struct gk20a *g, struct nvgpu_sgt *sgt) | ||
435 | { | ||
436 | /* | ||
437 | * Free this SGT. All we do is free the passed SGT. The actual Linux | ||
438 | * SGT/SGL needs to be freed separately. | ||
439 | */ | ||
440 | nvgpu_kfree(g, sgt); | ||
441 | } | ||
442 | |||
443 | static const struct nvgpu_sgt_ops nvgpu_linux_sgt_ops = { | ||
444 | .sgl_next = nvgpu_mem_linux_sgl_next, | ||
445 | .sgl_phys = nvgpu_mem_linux_sgl_phys, | ||
446 | .sgl_dma = nvgpu_mem_linux_sgl_dma, | ||
447 | .sgl_length = nvgpu_mem_linux_sgl_length, | ||
448 | .sgl_gpu_addr = nvgpu_mem_linux_sgl_gpu_addr, | ||
449 | .sgt_free = nvgpu_mem_linux_sgl_free, | ||
450 | }; | ||
451 | |||
452 | static struct nvgpu_sgt *__nvgpu_mem_get_sgl_from_vidmem( | ||
436 | struct gk20a *g, | 453 | struct gk20a *g, |
437 | struct scatterlist *linux_sgl) | 454 | struct scatterlist *linux_sgl) |
438 | { | 455 | { |
@@ -442,70 +459,31 @@ static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_create_from_vidmem( | |||
442 | if (!vidmem_alloc) | 459 | if (!vidmem_alloc) |
443 | return NULL; | 460 | return NULL; |
444 | 461 | ||
445 | nvgpu_log(g, gpu_dbg_sgl, "Vidmem sgl:"); | 462 | return &vidmem_alloc->sgt; |
446 | |||
447 | return __nvgpu_mem_sgl_dup(g, vidmem_alloc->sgl); | ||
448 | } | 463 | } |
449 | 464 | ||
450 | struct nvgpu_mem_sgl *nvgpu_mem_sgl_create(struct gk20a *g, | 465 | struct nvgpu_sgt *nvgpu_linux_sgt_create(struct gk20a *g, struct sg_table *sgt) |
451 | struct sg_table *sgt) | ||
452 | { | 466 | { |
453 | struct nvgpu_mem_sgl *head, *sgl, *next; | 467 | struct nvgpu_sgt *nvgpu_sgt; |
454 | struct scatterlist *linux_sgl = sgt->sgl; | 468 | struct scatterlist *linux_sgl = sgt->sgl; |
455 | 469 | ||
456 | if (is_vidmem_page_alloc(sg_dma_address(linux_sgl))) | 470 | if (is_vidmem_page_alloc(sg_dma_address(linux_sgl))) |
457 | return __nvgpu_mem_sgl_create_from_vidmem(g, linux_sgl); | 471 | return __nvgpu_mem_get_sgl_from_vidmem(g, linux_sgl); |
458 | 472 | ||
459 | head = nvgpu_kzalloc(g, sizeof(*sgl)); | 473 | nvgpu_sgt = nvgpu_kzalloc(g, sizeof(*nvgpu_sgt)); |
460 | if (!head) | 474 | if (!nvgpu_sgt) |
461 | return NULL; | 475 | return NULL; |
462 | 476 | ||
463 | nvgpu_log(g, gpu_dbg_sgl, "Making sgl:"); | 477 | nvgpu_log(g, gpu_dbg_sgl, "Making Linux SGL!"); |
464 | 478 | ||
465 | sgl = head; | 479 | nvgpu_sgt->sgl = sgt->sgl; |
466 | while (true) { | 480 | nvgpu_sgt->ops = &nvgpu_linux_sgt_ops; |
467 | sgl->dma = sg_dma_address(linux_sgl); | ||
468 | sgl->phys = sg_phys(linux_sgl); | ||
469 | sgl->length = linux_sgl->length; | ||
470 | |||
471 | /* | ||
472 | * We don't like offsets in the pages here. This will cause | ||
473 | * problems. | ||
474 | */ | ||
475 | if (WARN_ON(linux_sgl->offset)) { | ||
476 | nvgpu_mem_sgl_free(g, head); | ||
477 | return NULL; | ||
478 | } | ||
479 | |||
480 | nvgpu_log(g, gpu_dbg_sgl, | ||
481 | " phys: 0x%-12llx dma: 0x%-12llx len: 0x%llx", | ||
482 | sgl->phys, sgl->dma, sgl->length); | ||
483 | |||
484 | /* | ||
485 | * When there's no more SGL ents for the Linux SGL we are | ||
486 | * done. Don't bother making any more SGL ents for the nvgpu | ||
487 | * SGL. | ||
488 | */ | ||
489 | linux_sgl = sg_next(linux_sgl); | ||
490 | if (!linux_sgl) | ||
491 | break; | ||
492 | |||
493 | next = nvgpu_kzalloc(g, sizeof(*sgl)); | ||
494 | if (!next) { | ||
495 | nvgpu_mem_sgl_free(g, head); | ||
496 | return NULL; | ||
497 | } | ||
498 | |||
499 | sgl->next = next; | ||
500 | sgl = next; | ||
501 | } | ||
502 | 481 | ||
503 | nvgpu_log(g, gpu_dbg_sgl, "Done!"); | 482 | return nvgpu_sgt; |
504 | return head; | ||
505 | } | 483 | } |
506 | 484 | ||
507 | struct nvgpu_mem_sgl *nvgpu_mem_sgl_create_from_mem(struct gk20a *g, | 485 | struct nvgpu_sgt *nvgpu_sgt_create_from_mem(struct gk20a *g, |
508 | struct nvgpu_mem *mem) | 486 | struct nvgpu_mem *mem) |
509 | { | 487 | { |
510 | return nvgpu_mem_sgl_create(g, mem->priv.sgt); | 488 | return nvgpu_linux_sgt_create(g, mem->priv.sgt); |
511 | } | 489 | } |
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c index 4a4429dc..2e29f0f7 100644 --- a/drivers/gpu/nvgpu/common/linux/vm.c +++ b/drivers/gpu/nvgpu/common/linux/vm.c | |||
@@ -69,19 +69,20 @@ static u64 nvgpu_get_buffer_alignment(struct gk20a *g, struct scatterlist *sgl, | |||
69 | 69 | ||
70 | if (aperture == APERTURE_VIDMEM) { | 70 | if (aperture == APERTURE_VIDMEM) { |
71 | struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl); | 71 | struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl); |
72 | struct nvgpu_mem_sgl *sgl_vid = alloc->sgl; | 72 | struct nvgpu_sgt *sgt = &alloc->sgt; |
73 | void *sgl_vid = sgt->sgl; | ||
73 | 74 | ||
74 | while (sgl_vid) { | 75 | while (sgl_vid) { |
75 | chunk_align = 1ULL << | 76 | chunk_align = 1ULL << |
76 | __ffs(nvgpu_mem_sgl_phys(sgl_vid) | | 77 | __ffs(nvgpu_sgt_get_phys(sgt, sgl_vid)) | |
77 | nvgpu_mem_sgl_length(sgl_vid)); | 78 | nvgpu_sgt_get_length(sgt, sgl_vid); |
78 | 79 | ||
79 | if (align) | 80 | if (align) |
80 | align = min(align, chunk_align); | 81 | align = min(align, chunk_align); |
81 | else | 82 | else |
82 | align = chunk_align; | 83 | align = chunk_align; |
83 | 84 | ||
84 | sgl_vid = nvgpu_mem_sgl_next(sgl_vid); | 85 | sgl_vid = nvgpu_sgt_get_next(sgt, sgl_vid); |
85 | } | 86 | } |
86 | 87 | ||
87 | return align; | 88 | return align; |
@@ -242,7 +243,7 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm, | |||
242 | struct nvgpu_vm_area *vm_area = NULL; | 243 | struct nvgpu_vm_area *vm_area = NULL; |
243 | u32 ctag_offset; | 244 | u32 ctag_offset; |
244 | enum nvgpu_aperture aperture; | 245 | enum nvgpu_aperture aperture; |
245 | struct nvgpu_mem_sgl *nvgpu_sgl; | 246 | struct nvgpu_sgt *nvgpu_sgt; |
246 | 247 | ||
247 | /* | 248 | /* |
248 | * The kind used as part of the key for map caching. HW may | 249 | * The kind used as part of the key for map caching. HW may |
@@ -399,12 +400,12 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm, | |||
399 | ctag_offset += buffer_offset >> | 400 | ctag_offset += buffer_offset >> |
400 | ilog2(g->ops.fb.compression_page_size(g)); | 401 | ilog2(g->ops.fb.compression_page_size(g)); |
401 | 402 | ||
402 | nvgpu_sgl = nvgpu_mem_sgl_create(g, bfr.sgt); | 403 | nvgpu_sgt = nvgpu_linux_sgt_create(g, bfr.sgt); |
403 | 404 | ||
404 | /* update gmmu ptes */ | 405 | /* update gmmu ptes */ |
405 | map_offset = g->ops.mm.gmmu_map(vm, | 406 | map_offset = g->ops.mm.gmmu_map(vm, |
406 | map_offset, | 407 | map_offset, |
407 | nvgpu_sgl, | 408 | nvgpu_sgt, |
408 | buffer_offset, /* sg offset */ | 409 | buffer_offset, /* sg offset */ |
409 | mapping_size, | 410 | mapping_size, |
410 | bfr.pgsz_idx, | 411 | bfr.pgsz_idx, |
@@ -419,7 +420,7 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm, | |||
419 | if (!map_offset) | 420 | if (!map_offset) |
420 | goto clean_up; | 421 | goto clean_up; |
421 | 422 | ||
422 | nvgpu_mem_sgl_free(g, nvgpu_sgl); | 423 | nvgpu_sgt_free(nvgpu_sgt, g); |
423 | 424 | ||
424 | mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer)); | 425 | mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer)); |
425 | if (!mapped_buffer) { | 426 | if (!mapped_buffer) { |