summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
diff options
context:
space:
mode:
authorSunny He <suhe@nvidia.com>2017-08-15 15:01:04 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-09-22 15:55:24 -0400
commit17c581d75514c32d1e8c1e416beb33b3ccce22a5 (patch)
treea25d063f19b8e1f83f61af418f3aa2ac32fe0cce /drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
parent0090ee5aca268a3c359f34c74b8c521df3bd8593 (diff)
gpu: nvgpu: SGL passthrough implementation
The basic nvgpu_mem_sgl implementation provides support for OS specific scatter-gather list implementations by simply copying them node by node. This is inefficient, taking extra time and memory. This patch implements an nvgpu_mem_sgt struct to act as a header which is inserted at the front of any scatter- gather list implementation. This labels every struct with a set of ops which can be used to interact with the attached scatter gather list. Since nvgpu common code only has to interact with these function pointers, any sgl implementation can be used. Initialization only requires the allocation of a single struct, removing the need to copy or iterate through the sgl being converted. Jira NVGPU-186 Change-Id: I2994f804a4a4cc141b702e987e9081d8560ba2e8 Signed-off-by: Sunny He <suhe@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1541426 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux/nvgpu_mem.c')
-rw-r--r--drivers/gpu/nvgpu/common/linux/nvgpu_mem.c144
1 files changed, 61 insertions, 83 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
index eb54f3fd..8d8909dd 100644
--- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
@@ -397,42 +397,59 @@ int __nvgpu_mem_create_from_pages(struct gk20a *g, struct nvgpu_mem *dest,
397 return 0; 397 return 0;
398} 398}
399 399
400static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_dup(struct gk20a *g, 400static void *nvgpu_mem_linux_sgl_next(void *sgl)
401 struct nvgpu_mem_sgl *sgl)
402{ 401{
403 struct nvgpu_mem_sgl *head, *next; 402 return sg_next((struct scatterlist *)sgl);
403}
404 404
405 head = nvgpu_kzalloc(g, sizeof(*sgl)); 405static u64 nvgpu_mem_linux_sgl_phys(void *sgl)
406 if (!head) 406{
407 return NULL; 407 return (u64)sg_phys((struct scatterlist *)sgl);
408}
408 409
409 next = head; 410static u64 nvgpu_mem_linux_sgl_dma(void *sgl)
410 while (true) { 411{
411 nvgpu_log(g, gpu_dbg_sgl, 412 return (u64)sg_dma_address((struct scatterlist *)sgl);
412 " phys: 0x%-12llx dma: 0x%-12llx len: 0x%llx", 413}
413 sgl->phys, sgl->dma, sgl->length);
414
415 next->dma = sgl->dma;
416 next->phys = sgl->phys;
417 next->length = sgl->length;
418 next->next = NULL;
419
420 sgl = nvgpu_mem_sgl_next(sgl);
421 if (!sgl)
422 break;
423
424 next->next = nvgpu_kzalloc(g, sizeof(*sgl));
425 if (!next->next) {
426 nvgpu_mem_sgl_free(g, head);
427 return NULL;
428 }
429 next = next->next;
430 }
431 414
432 return head; 415static u64 nvgpu_mem_linux_sgl_length(void *sgl)
416{
417 return (u64)((struct scatterlist *)sgl)->length;
433} 418}
434 419
435static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_create_from_vidmem( 420static u64 nvgpu_mem_linux_sgl_gpu_addr(struct gk20a *g, void *sgl,
421 struct nvgpu_gmmu_attrs *attrs)
422{
423 if (sg_dma_address((struct scatterlist *)sgl) == 0)
424 return g->ops.mm.gpu_phys_addr(g, attrs,
425 sg_phys((struct scatterlist *)sgl));
426
427 if (sg_dma_address((struct scatterlist *)sgl) == DMA_ERROR_CODE)
428 return 0;
429
430 return gk20a_mm_smmu_vaddr_translate(g,
431 sg_dma_address((struct scatterlist *)sgl));
432}
433
434static void nvgpu_mem_linux_sgl_free(struct gk20a *g, struct nvgpu_sgt *sgt)
435{
436 /*
437 * Free this SGT. All we do is free the passed SGT. The actual Linux
438 * SGT/SGL needs to be freed separately.
439 */
440 nvgpu_kfree(g, sgt);
441}
442
443static const struct nvgpu_sgt_ops nvgpu_linux_sgt_ops = {
444 .sgl_next = nvgpu_mem_linux_sgl_next,
445 .sgl_phys = nvgpu_mem_linux_sgl_phys,
446 .sgl_dma = nvgpu_mem_linux_sgl_dma,
447 .sgl_length = nvgpu_mem_linux_sgl_length,
448 .sgl_gpu_addr = nvgpu_mem_linux_sgl_gpu_addr,
449 .sgt_free = nvgpu_mem_linux_sgl_free,
450};
451
452static struct nvgpu_sgt *__nvgpu_mem_get_sgl_from_vidmem(
436 struct gk20a *g, 453 struct gk20a *g,
437 struct scatterlist *linux_sgl) 454 struct scatterlist *linux_sgl)
438{ 455{
@@ -442,70 +459,31 @@ static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_create_from_vidmem(
442 if (!vidmem_alloc) 459 if (!vidmem_alloc)
443 return NULL; 460 return NULL;
444 461
445 nvgpu_log(g, gpu_dbg_sgl, "Vidmem sgl:"); 462 return &vidmem_alloc->sgt;
446
447 return __nvgpu_mem_sgl_dup(g, vidmem_alloc->sgl);
448} 463}
449 464
450struct nvgpu_mem_sgl *nvgpu_mem_sgl_create(struct gk20a *g, 465struct nvgpu_sgt *nvgpu_linux_sgt_create(struct gk20a *g, struct sg_table *sgt)
451 struct sg_table *sgt)
452{ 466{
453 struct nvgpu_mem_sgl *head, *sgl, *next; 467 struct nvgpu_sgt *nvgpu_sgt;
454 struct scatterlist *linux_sgl = sgt->sgl; 468 struct scatterlist *linux_sgl = sgt->sgl;
455 469
456 if (is_vidmem_page_alloc(sg_dma_address(linux_sgl))) 470 if (is_vidmem_page_alloc(sg_dma_address(linux_sgl)))
457 return __nvgpu_mem_sgl_create_from_vidmem(g, linux_sgl); 471 return __nvgpu_mem_get_sgl_from_vidmem(g, linux_sgl);
458 472
459 head = nvgpu_kzalloc(g, sizeof(*sgl)); 473 nvgpu_sgt = nvgpu_kzalloc(g, sizeof(*nvgpu_sgt));
460 if (!head) 474 if (!nvgpu_sgt)
461 return NULL; 475 return NULL;
462 476
463 nvgpu_log(g, gpu_dbg_sgl, "Making sgl:"); 477 nvgpu_log(g, gpu_dbg_sgl, "Making Linux SGL!");
464 478
465 sgl = head; 479 nvgpu_sgt->sgl = sgt->sgl;
466 while (true) { 480 nvgpu_sgt->ops = &nvgpu_linux_sgt_ops;
467 sgl->dma = sg_dma_address(linux_sgl);
468 sgl->phys = sg_phys(linux_sgl);
469 sgl->length = linux_sgl->length;
470
471 /*
472 * We don't like offsets in the pages here. This will cause
473 * problems.
474 */
475 if (WARN_ON(linux_sgl->offset)) {
476 nvgpu_mem_sgl_free(g, head);
477 return NULL;
478 }
479
480 nvgpu_log(g, gpu_dbg_sgl,
481 " phys: 0x%-12llx dma: 0x%-12llx len: 0x%llx",
482 sgl->phys, sgl->dma, sgl->length);
483
484 /*
485 * When there's no more SGL ents for the Linux SGL we are
486 * done. Don't bother making any more SGL ents for the nvgpu
487 * SGL.
488 */
489 linux_sgl = sg_next(linux_sgl);
490 if (!linux_sgl)
491 break;
492
493 next = nvgpu_kzalloc(g, sizeof(*sgl));
494 if (!next) {
495 nvgpu_mem_sgl_free(g, head);
496 return NULL;
497 }
498
499 sgl->next = next;
500 sgl = next;
501 }
502 481
503 nvgpu_log(g, gpu_dbg_sgl, "Done!"); 482 return nvgpu_sgt;
504 return head;
505} 483}
506 484
507struct nvgpu_mem_sgl *nvgpu_mem_sgl_create_from_mem(struct gk20a *g, 485struct nvgpu_sgt *nvgpu_sgt_create_from_mem(struct gk20a *g,
508 struct nvgpu_mem *mem) 486 struct nvgpu_mem *mem)
509{ 487{
510 return nvgpu_mem_sgl_create(g, mem->priv.sgt); 488 return nvgpu_linux_sgt_create(g, mem->priv.sgt);
511} 489}