summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSunny He <suhe@nvidia.com>2017-08-15 15:01:04 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-09-22 15:55:24 -0400
commit17c581d75514c32d1e8c1e416beb33b3ccce22a5 (patch)
treea25d063f19b8e1f83f61af418f3aa2ac32fe0cce
parent0090ee5aca268a3c359f34c74b8c521df3bd8593 (diff)
gpu: nvgpu: SGL passthrough implementation
The basic nvgpu_mem_sgl implementation provides support for OS specific scatter-gather list implementations by simply copying them node by node. This is inefficient, taking extra time and memory. This patch implements an nvgpu_mem_sgt struct to act as a header which is inserted at the front of any scatter- gather list implementation. This labels every struct with a set of ops which can be used to interact with the attached scatter gather list. Since nvgpu common code only has to interact with these function pointers, any sgl implementation can be used. Initialization only requires the allocation of a single struct, removing the need to copy or iterate through the sgl being converted. Jira NVGPU-186 Change-Id: I2994f804a4a4cc141b702e987e9081d8560ba2e8 Signed-off-by: Sunny He <suhe@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1541426 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/common/linux/nvgpu_mem.c144
-rw-r--r--drivers/gpu/nvgpu/common/linux/vm.c17
-rw-r--r--drivers/gpu/nvgpu/common/mm/gmmu.c67
-rw-r--r--drivers/gpu/nvgpu/common/mm/nvgpu_mem.c47
-rw-r--r--drivers/gpu/nvgpu/common/mm/page_allocator.c114
-rw-r--r--drivers/gpu/nvgpu/common/pramin.c21
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h8
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c12
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h2
-rw-r--r--drivers/gpu/nvgpu/gk20a/pramin_gk20a.c10
-rw-r--r--drivers/gpu/nvgpu/gk20a/pramin_gk20a.h4
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h7
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h80
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/page_allocator.h4
-rw-r--r--drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c19
-rw-r--r--drivers/gpu/nvgpu/vgpu/mm_vgpu.c4
16 files changed, 320 insertions, 240 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
index eb54f3fd..8d8909dd 100644
--- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
@@ -397,42 +397,59 @@ int __nvgpu_mem_create_from_pages(struct gk20a *g, struct nvgpu_mem *dest,
397 return 0; 397 return 0;
398} 398}
399 399
400static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_dup(struct gk20a *g, 400static void *nvgpu_mem_linux_sgl_next(void *sgl)
401 struct nvgpu_mem_sgl *sgl)
402{ 401{
403 struct nvgpu_mem_sgl *head, *next; 402 return sg_next((struct scatterlist *)sgl);
403}
404 404
405 head = nvgpu_kzalloc(g, sizeof(*sgl)); 405static u64 nvgpu_mem_linux_sgl_phys(void *sgl)
406 if (!head) 406{
407 return NULL; 407 return (u64)sg_phys((struct scatterlist *)sgl);
408}
408 409
409 next = head; 410static u64 nvgpu_mem_linux_sgl_dma(void *sgl)
410 while (true) { 411{
411 nvgpu_log(g, gpu_dbg_sgl, 412 return (u64)sg_dma_address((struct scatterlist *)sgl);
412 " phys: 0x%-12llx dma: 0x%-12llx len: 0x%llx", 413}
413 sgl->phys, sgl->dma, sgl->length);
414
415 next->dma = sgl->dma;
416 next->phys = sgl->phys;
417 next->length = sgl->length;
418 next->next = NULL;
419
420 sgl = nvgpu_mem_sgl_next(sgl);
421 if (!sgl)
422 break;
423
424 next->next = nvgpu_kzalloc(g, sizeof(*sgl));
425 if (!next->next) {
426 nvgpu_mem_sgl_free(g, head);
427 return NULL;
428 }
429 next = next->next;
430 }
431 414
432 return head; 415static u64 nvgpu_mem_linux_sgl_length(void *sgl)
416{
417 return (u64)((struct scatterlist *)sgl)->length;
433} 418}
434 419
435static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_create_from_vidmem( 420static u64 nvgpu_mem_linux_sgl_gpu_addr(struct gk20a *g, void *sgl,
421 struct nvgpu_gmmu_attrs *attrs)
422{
423 if (sg_dma_address((struct scatterlist *)sgl) == 0)
424 return g->ops.mm.gpu_phys_addr(g, attrs,
425 sg_phys((struct scatterlist *)sgl));
426
427 if (sg_dma_address((struct scatterlist *)sgl) == DMA_ERROR_CODE)
428 return 0;
429
430 return gk20a_mm_smmu_vaddr_translate(g,
431 sg_dma_address((struct scatterlist *)sgl));
432}
433
434static void nvgpu_mem_linux_sgl_free(struct gk20a *g, struct nvgpu_sgt *sgt)
435{
436 /*
437 * Free this SGT. All we do is free the passed SGT. The actual Linux
438 * SGT/SGL needs to be freed separately.
439 */
440 nvgpu_kfree(g, sgt);
441}
442
443static const struct nvgpu_sgt_ops nvgpu_linux_sgt_ops = {
444 .sgl_next = nvgpu_mem_linux_sgl_next,
445 .sgl_phys = nvgpu_mem_linux_sgl_phys,
446 .sgl_dma = nvgpu_mem_linux_sgl_dma,
447 .sgl_length = nvgpu_mem_linux_sgl_length,
448 .sgl_gpu_addr = nvgpu_mem_linux_sgl_gpu_addr,
449 .sgt_free = nvgpu_mem_linux_sgl_free,
450};
451
452static struct nvgpu_sgt *__nvgpu_mem_get_sgl_from_vidmem(
436 struct gk20a *g, 453 struct gk20a *g,
437 struct scatterlist *linux_sgl) 454 struct scatterlist *linux_sgl)
438{ 455{
@@ -442,70 +459,31 @@ static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_create_from_vidmem(
442 if (!vidmem_alloc) 459 if (!vidmem_alloc)
443 return NULL; 460 return NULL;
444 461
445 nvgpu_log(g, gpu_dbg_sgl, "Vidmem sgl:"); 462 return &vidmem_alloc->sgt;
446
447 return __nvgpu_mem_sgl_dup(g, vidmem_alloc->sgl);
448} 463}
449 464
450struct nvgpu_mem_sgl *nvgpu_mem_sgl_create(struct gk20a *g, 465struct nvgpu_sgt *nvgpu_linux_sgt_create(struct gk20a *g, struct sg_table *sgt)
451 struct sg_table *sgt)
452{ 466{
453 struct nvgpu_mem_sgl *head, *sgl, *next; 467 struct nvgpu_sgt *nvgpu_sgt;
454 struct scatterlist *linux_sgl = sgt->sgl; 468 struct scatterlist *linux_sgl = sgt->sgl;
455 469
456 if (is_vidmem_page_alloc(sg_dma_address(linux_sgl))) 470 if (is_vidmem_page_alloc(sg_dma_address(linux_sgl)))
457 return __nvgpu_mem_sgl_create_from_vidmem(g, linux_sgl); 471 return __nvgpu_mem_get_sgl_from_vidmem(g, linux_sgl);
458 472
459 head = nvgpu_kzalloc(g, sizeof(*sgl)); 473 nvgpu_sgt = nvgpu_kzalloc(g, sizeof(*nvgpu_sgt));
460 if (!head) 474 if (!nvgpu_sgt)
461 return NULL; 475 return NULL;
462 476
463 nvgpu_log(g, gpu_dbg_sgl, "Making sgl:"); 477 nvgpu_log(g, gpu_dbg_sgl, "Making Linux SGL!");
464 478
465 sgl = head; 479 nvgpu_sgt->sgl = sgt->sgl;
466 while (true) { 480 nvgpu_sgt->ops = &nvgpu_linux_sgt_ops;
467 sgl->dma = sg_dma_address(linux_sgl);
468 sgl->phys = sg_phys(linux_sgl);
469 sgl->length = linux_sgl->length;
470
471 /*
472 * We don't like offsets in the pages here. This will cause
473 * problems.
474 */
475 if (WARN_ON(linux_sgl->offset)) {
476 nvgpu_mem_sgl_free(g, head);
477 return NULL;
478 }
479
480 nvgpu_log(g, gpu_dbg_sgl,
481 " phys: 0x%-12llx dma: 0x%-12llx len: 0x%llx",
482 sgl->phys, sgl->dma, sgl->length);
483
484 /*
485 * When there's no more SGL ents for the Linux SGL we are
486 * done. Don't bother making any more SGL ents for the nvgpu
487 * SGL.
488 */
489 linux_sgl = sg_next(linux_sgl);
490 if (!linux_sgl)
491 break;
492
493 next = nvgpu_kzalloc(g, sizeof(*sgl));
494 if (!next) {
495 nvgpu_mem_sgl_free(g, head);
496 return NULL;
497 }
498
499 sgl->next = next;
500 sgl = next;
501 }
502 481
503 nvgpu_log(g, gpu_dbg_sgl, "Done!"); 482 return nvgpu_sgt;
504 return head;
505} 483}
506 484
507struct nvgpu_mem_sgl *nvgpu_mem_sgl_create_from_mem(struct gk20a *g, 485struct nvgpu_sgt *nvgpu_sgt_create_from_mem(struct gk20a *g,
508 struct nvgpu_mem *mem) 486 struct nvgpu_mem *mem)
509{ 487{
510 return nvgpu_mem_sgl_create(g, mem->priv.sgt); 488 return nvgpu_linux_sgt_create(g, mem->priv.sgt);
511} 489}
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c
index 4a4429dc..2e29f0f7 100644
--- a/drivers/gpu/nvgpu/common/linux/vm.c
+++ b/drivers/gpu/nvgpu/common/linux/vm.c
@@ -69,19 +69,20 @@ static u64 nvgpu_get_buffer_alignment(struct gk20a *g, struct scatterlist *sgl,
69 69
70 if (aperture == APERTURE_VIDMEM) { 70 if (aperture == APERTURE_VIDMEM) {
71 struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl); 71 struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl);
72 struct nvgpu_mem_sgl *sgl_vid = alloc->sgl; 72 struct nvgpu_sgt *sgt = &alloc->sgt;
73 void *sgl_vid = sgt->sgl;
73 74
74 while (sgl_vid) { 75 while (sgl_vid) {
75 chunk_align = 1ULL << 76 chunk_align = 1ULL <<
76 __ffs(nvgpu_mem_sgl_phys(sgl_vid) | 77 __ffs(nvgpu_sgt_get_phys(sgt, sgl_vid)) |
77 nvgpu_mem_sgl_length(sgl_vid)); 78 nvgpu_sgt_get_length(sgt, sgl_vid);
78 79
79 if (align) 80 if (align)
80 align = min(align, chunk_align); 81 align = min(align, chunk_align);
81 else 82 else
82 align = chunk_align; 83 align = chunk_align;
83 84
84 sgl_vid = nvgpu_mem_sgl_next(sgl_vid); 85 sgl_vid = nvgpu_sgt_get_next(sgt, sgl_vid);
85 } 86 }
86 87
87 return align; 88 return align;
@@ -242,7 +243,7 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
242 struct nvgpu_vm_area *vm_area = NULL; 243 struct nvgpu_vm_area *vm_area = NULL;
243 u32 ctag_offset; 244 u32 ctag_offset;
244 enum nvgpu_aperture aperture; 245 enum nvgpu_aperture aperture;
245 struct nvgpu_mem_sgl *nvgpu_sgl; 246 struct nvgpu_sgt *nvgpu_sgt;
246 247
247 /* 248 /*
248 * The kind used as part of the key for map caching. HW may 249 * The kind used as part of the key for map caching. HW may
@@ -399,12 +400,12 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
399 ctag_offset += buffer_offset >> 400 ctag_offset += buffer_offset >>
400 ilog2(g->ops.fb.compression_page_size(g)); 401 ilog2(g->ops.fb.compression_page_size(g));
401 402
402 nvgpu_sgl = nvgpu_mem_sgl_create(g, bfr.sgt); 403 nvgpu_sgt = nvgpu_linux_sgt_create(g, bfr.sgt);
403 404
404 /* update gmmu ptes */ 405 /* update gmmu ptes */
405 map_offset = g->ops.mm.gmmu_map(vm, 406 map_offset = g->ops.mm.gmmu_map(vm,
406 map_offset, 407 map_offset,
407 nvgpu_sgl, 408 nvgpu_sgt,
408 buffer_offset, /* sg offset */ 409 buffer_offset, /* sg offset */
409 mapping_size, 410 mapping_size,
410 bfr.pgsz_idx, 411 bfr.pgsz_idx,
@@ -419,7 +420,7 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
419 if (!map_offset) 420 if (!map_offset)
420 goto clean_up; 421 goto clean_up;
421 422
422 nvgpu_mem_sgl_free(g, nvgpu_sgl); 423 nvgpu_sgt_free(nvgpu_sgt, g);
423 424
424 mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer)); 425 mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer));
425 if (!mapped_buffer) { 426 if (!mapped_buffer) {
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
index 41f5acdd..66bce8f0 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -65,14 +65,14 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
65 struct gk20a *g = gk20a_from_vm(vm); 65 struct gk20a *g = gk20a_from_vm(vm);
66 u64 vaddr; 66 u64 vaddr;
67 67
68 struct nvgpu_mem_sgl *sgl = nvgpu_mem_sgl_create_from_mem(g, mem); 68 struct nvgpu_sgt *sgt = nvgpu_sgt_create_from_mem(g, mem);
69 69
70 if (!sgl) 70 if (!sgt)
71 return -ENOMEM; 71 return -ENOMEM;
72 72
73 nvgpu_mutex_acquire(&vm->update_gmmu_lock); 73 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
74 vaddr = g->ops.mm.gmmu_map(vm, addr, 74 vaddr = g->ops.mm.gmmu_map(vm, addr,
75 sgl, /* sg list */ 75 sgt, /* sg list */
76 0, /* sg offset */ 76 0, /* sg offset */
77 size, 77 size,
78 gmmu_page_size_kernel, 78 gmmu_page_size_kernel,
@@ -86,7 +86,7 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
86 aperture); 86 aperture);
87 nvgpu_mutex_release(&vm->update_gmmu_lock); 87 nvgpu_mutex_release(&vm->update_gmmu_lock);
88 88
89 nvgpu_mem_sgl_free(g, sgl); 89 nvgpu_sgt_free(sgt, g);
90 90
91 if (!vaddr) { 91 if (!vaddr) {
92 nvgpu_err(g, "failed to map buffer!"); 92 nvgpu_err(g, "failed to map buffer!");
@@ -464,7 +464,7 @@ static int __set_pd_level(struct vm_gk20a *vm,
464 * VIDMEM version of the update_ptes logic. 464 * VIDMEM version of the update_ptes logic.
465 */ 465 */
466static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm, 466static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm,
467 struct nvgpu_mem_sgl *sgl, 467 struct nvgpu_sgt *sgt,
468 u64 space_to_skip, 468 u64 space_to_skip,
469 u64 virt_addr, 469 u64 virt_addr,
470 u64 length, 470 u64 length,
@@ -472,8 +472,9 @@ static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm,
472{ 472{
473 u64 phys_addr, chunk_length; 473 u64 phys_addr, chunk_length;
474 int err = 0; 474 int err = 0;
475 void *sgl;
475 476
476 if (!sgl) { 477 if (!sgt) {
477 /* 478 /*
478 * This is considered an unmap. Just pass in 0 as the physical 479 * This is considered an unmap. Just pass in 0 as the physical
479 * address for the entire GPU range. 480 * address for the entire GPU range.
@@ -490,16 +491,17 @@ static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm,
490 * Otherwise iterate across all the chunks in this allocation and 491 * Otherwise iterate across all the chunks in this allocation and
491 * map them. 492 * map them.
492 */ 493 */
494 sgl = sgt->sgl;
493 while (sgl) { 495 while (sgl) {
494 if (space_to_skip && 496 if (space_to_skip &&
495 space_to_skip >= nvgpu_mem_sgl_length(sgl)) { 497 space_to_skip >= nvgpu_sgt_get_length(sgt, sgl)) {
496 space_to_skip -= nvgpu_mem_sgl_length(sgl); 498 space_to_skip -= nvgpu_sgt_get_length(sgt, sgl);
497 sgl = nvgpu_mem_sgl_next(sgl); 499 sgl = nvgpu_sgt_get_next(sgt, sgl);
498 continue; 500 continue;
499 } 501 }
500 502
501 phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip; 503 phys_addr = nvgpu_sgt_get_phys(sgt, sgl) + space_to_skip;
502 chunk_length = min(length, (nvgpu_mem_sgl_length(sgl) - 504 chunk_length = min(length, (nvgpu_sgt_get_length(sgt, sgl) -
503 space_to_skip)); 505 space_to_skip));
504 506
505 err = __set_pd_level(vm, &vm->pdb, 507 err = __set_pd_level(vm, &vm->pdb,
@@ -518,27 +520,27 @@ static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm,
518 */ 520 */
519 virt_addr += chunk_length; 521 virt_addr += chunk_length;
520 length -= chunk_length; 522 length -= chunk_length;
523 sgl = nvgpu_sgt_get_next(sgt, sgl);
521 524
522 if (length == 0) 525 if (length == 0)
523 break; 526 break;
524
525 sgl = nvgpu_mem_sgl_next(sgl);
526 } 527 }
527 528
528 return err; 529 return err;
529} 530}
530 531
531static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, 532static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
532 struct nvgpu_mem_sgl *sgl, 533 struct nvgpu_sgt *sgt,
533 u64 space_to_skip, 534 u64 space_to_skip,
534 u64 virt_addr, 535 u64 virt_addr,
535 u64 length, 536 u64 length,
536 struct nvgpu_gmmu_attrs *attrs) 537 struct nvgpu_gmmu_attrs *attrs)
537{ 538{
538 int err;
539 struct gk20a *g = gk20a_from_vm(vm); 539 struct gk20a *g = gk20a_from_vm(vm);
540 void *sgl;
541 int err;
540 542
541 if (!sgl) { 543 if (!sgt) {
542 /* 544 /*
543 * This is considered an unmap. Just pass in 0 as the physical 545 * This is considered an unmap. Just pass in 0 as the physical
544 * address for the entire GPU range. 546 * address for the entire GPU range.
@@ -559,8 +561,10 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
559 * mapping is simple since the "physical" address is actually a virtual 561 * mapping is simple since the "physical" address is actually a virtual
560 * IO address and will be contiguous. 562 * IO address and will be contiguous.
561 */ 563 */
564 sgl = sgt->sgl;
565
562 if (!g->mm.bypass_smmu) { 566 if (!g->mm.bypass_smmu) {
563 u64 io_addr = nvgpu_mem_sgl_gpu_addr(g, sgl, attrs); 567 u64 io_addr = nvgpu_sgt_get_gpu_addr(sgt, g, sgl, attrs);
564 568
565 io_addr += space_to_skip; 569 io_addr += space_to_skip;
566 570
@@ -586,15 +590,15 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
586 * Cut out sgl ents for space_to_skip. 590 * Cut out sgl ents for space_to_skip.
587 */ 591 */
588 if (space_to_skip && 592 if (space_to_skip &&
589 space_to_skip >= nvgpu_mem_sgl_length(sgl)) { 593 space_to_skip >= nvgpu_sgt_get_length(sgt, sgl)) {
590 space_to_skip -= nvgpu_mem_sgl_length(sgl); 594 space_to_skip -= nvgpu_sgt_get_length(sgt, sgl);
591 sgl = nvgpu_mem_sgl_next(sgl); 595 sgl = nvgpu_sgt_get_next(sgt, sgl);
592 continue; 596 continue;
593 } 597 }
594 598
595 phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip; 599 phys_addr = nvgpu_sgt_get_phys(sgt, sgl) + space_to_skip;
596 chunk_length = min(length, 600 chunk_length = min(length,
597 nvgpu_mem_sgl_length(sgl) - space_to_skip); 601 nvgpu_sgt_get_length(sgt, sgl) - space_to_skip);
598 602
599 err = __set_pd_level(vm, &vm->pdb, 603 err = __set_pd_level(vm, &vm->pdb,
600 0, 604 0,
@@ -606,7 +610,7 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
606 space_to_skip = 0; 610 space_to_skip = 0;
607 virt_addr += chunk_length; 611 virt_addr += chunk_length;
608 length -= chunk_length; 612 length -= chunk_length;
609 sgl = nvgpu_mem_sgl_next(sgl); 613 sgl = nvgpu_sgt_get_next(sgt, sgl);
610 614
611 if (length == 0) 615 if (length == 0)
612 break; 616 break;
@@ -631,7 +635,7 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
631 * case of SMMU usage. 635 * case of SMMU usage.
632 */ 636 */
633static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, 637static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
634 struct nvgpu_mem_sgl *sgl, 638 struct nvgpu_sgt *sgt,
635 u64 space_to_skip, 639 u64 space_to_skip,
636 u64 virt_addr, 640 u64 virt_addr,
637 u64 length, 641 u64 length,
@@ -669,10 +673,10 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
669 "phys offset: %#-4llx; pgsz: %3dkb perm=%-2s | " 673 "phys offset: %#-4llx; pgsz: %3dkb perm=%-2s | "
670 "kind=%#02x APT=%-6s %c%c%c%c%c", 674 "kind=%#02x APT=%-6s %c%c%c%c%c",
671 vm->name, 675 vm->name,
672 sgl ? "MAP" : "UNMAP", 676 sgt ? "MAP" : "UNMAP",
673 virt_addr, 677 virt_addr,
674 length, 678 length,
675 sgl ? nvgpu_mem_sgl_phys(sgl) : 0, 679 sgt ? nvgpu_sgt_get_phys(sgt, sgt->sgl) : 0,
676 space_to_skip, 680 space_to_skip,
677 page_size >> 10, 681 page_size >> 10,
678 nvgpu_gmmu_perm_str(attrs->rw_flag), 682 nvgpu_gmmu_perm_str(attrs->rw_flag),
@@ -690,14 +694,14 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
690 */ 694 */
691 if (attrs->aperture == APERTURE_VIDMEM) 695 if (attrs->aperture == APERTURE_VIDMEM)
692 err = __nvgpu_gmmu_update_page_table_vidmem(vm, 696 err = __nvgpu_gmmu_update_page_table_vidmem(vm,
693 sgl, 697 sgt,
694 space_to_skip, 698 space_to_skip,
695 virt_addr, 699 virt_addr,
696 length, 700 length,
697 attrs); 701 attrs);
698 else 702 else
699 err = __nvgpu_gmmu_update_page_table_sysmem(vm, 703 err = __nvgpu_gmmu_update_page_table_sysmem(vm,
700 sgl, 704 sgt,
701 space_to_skip, 705 space_to_skip,
702 virt_addr, 706 virt_addr,
703 length, 707 length,
@@ -706,7 +710,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
706 unmap_gmmu_pages(g, &vm->pdb); 710 unmap_gmmu_pages(g, &vm->pdb);
707 nvgpu_smp_mb(); 711 nvgpu_smp_mb();
708 712
709 __gmmu_dbg(g, attrs, "%-5s Done!", sgl ? "MAP" : "UNMAP"); 713 __gmmu_dbg(g, attrs, "%-5s Done!", sgt ? "MAP" : "UNMAP");
710 714
711 return err; 715 return err;
712} 716}
@@ -725,7 +729,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
725 */ 729 */
726u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, 730u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
727 u64 vaddr, 731 u64 vaddr,
728 struct nvgpu_mem_sgl *sgl, 732 struct nvgpu_sgt *sgt,
729 u64 buffer_offset, 733 u64 buffer_offset,
730 u64 size, 734 u64 size,
731 int pgsz_idx, 735 int pgsz_idx,
@@ -774,7 +778,7 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
774 allocated = true; 778 allocated = true;
775 } 779 }
776 780
777 err = __nvgpu_gmmu_update_page_table(vm, sgl, buffer_offset, 781 err = __nvgpu_gmmu_update_page_table(vm, sgt, buffer_offset,
778 vaddr, size, &attrs); 782 vaddr, size, &attrs);
779 if (err) { 783 if (err) {
780 nvgpu_err(g, "failed to update ptes on map"); 784 nvgpu_err(g, "failed to update ptes on map");
@@ -787,6 +791,7 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
787 batch->need_tlb_invalidate = true; 791 batch->need_tlb_invalidate = true;
788 792
789 return vaddr; 793 return vaddr;
794
790fail_validate: 795fail_validate:
791 if (allocated) 796 if (allocated)
792 __nvgpu_vm_free_va(vm, vaddr, pgsz_idx); 797 __nvgpu_vm_free_va(vm, vaddr, pgsz_idx);
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
index 7296c673..6decec24 100644
--- a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
@@ -19,55 +19,34 @@
19 19
20#include "gk20a/gk20a.h" 20#include "gk20a/gk20a.h"
21 21
22struct nvgpu_mem_sgl *nvgpu_mem_sgl_next(struct nvgpu_mem_sgl *sgl) 22void *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt, void *sgl)
23{ 23{
24 return sgl->next; 24 return sgt->ops->sgl_next(sgl);
25} 25}
26 26
27u64 nvgpu_mem_sgl_phys(struct nvgpu_mem_sgl *sgl) 27u64 nvgpu_sgt_get_phys(struct nvgpu_sgt *sgt, void *sgl)
28{ 28{
29 return sgl->phys; 29 return sgt->ops->sgl_phys(sgl);
30} 30}
31 31
32u64 nvgpu_mem_sgl_dma(struct nvgpu_mem_sgl *sgl) 32u64 nvgpu_sgt_get_dma(struct nvgpu_sgt *sgt, void *sgl)
33{ 33{
34 return sgl->dma; 34 return sgt->ops->sgl_dma(sgl);
35} 35}
36 36
37u64 nvgpu_mem_sgl_length(struct nvgpu_mem_sgl *sgl) 37u64 nvgpu_sgt_get_length(struct nvgpu_sgt *sgt, void *sgl)
38{ 38{
39 return sgl->length; 39 return sgt->ops->sgl_length(sgl);
40} 40}
41 41
42/* 42u64 nvgpu_sgt_get_gpu_addr(struct nvgpu_sgt *sgt, struct gk20a *g, void *sgl,
43 * This builds a GPU address for the %sgl based on whether an IOMMU is present
44 * or not. It also handles turning the physical address into the true GPU
45 * physical address that should be programmed into the page tables.
46 */
47u64 nvgpu_mem_sgl_gpu_addr(struct gk20a *g, struct nvgpu_mem_sgl *sgl,
48 struct nvgpu_gmmu_attrs *attrs) 43 struct nvgpu_gmmu_attrs *attrs)
49{ 44{
50 if (nvgpu_mem_sgl_dma(sgl) == 0) 45 return sgt->ops->sgl_gpu_addr(g, sgl, attrs);
51 return g->ops.mm.gpu_phys_addr(g, attrs,
52 nvgpu_mem_sgl_phys(sgl));
53
54 if (nvgpu_mem_sgl_dma(sgl) == DMA_ERROR_CODE)
55 return 0;
56
57 return gk20a_mm_smmu_vaddr_translate(g, nvgpu_mem_sgl_dma(sgl));
58} 46}
59 47
60void nvgpu_mem_sgl_free(struct gk20a *g, struct nvgpu_mem_sgl *sgl) 48void nvgpu_sgt_free(struct nvgpu_sgt *sgt, struct gk20a *g)
61{ 49{
62 struct nvgpu_mem_sgl *next; 50 if (sgt && sgt->ops->sgt_free)
63 51 sgt->ops->sgt_free(g, sgt);
64 /*
65 * Free each of the elements. We expect each element to have been
66 * nvgpu_k[mz]alloc()ed.
67 */
68 while (sgl) {
69 next = nvgpu_mem_sgl_next(sgl);
70 nvgpu_kfree(g, sgl);
71 sgl = next;
72 }
73} 52}
diff --git a/drivers/gpu/nvgpu/common/mm/page_allocator.c b/drivers/gpu/nvgpu/common/mm/page_allocator.c
index 6d92b457..9c35f528 100644
--- a/drivers/gpu/nvgpu/common/mm/page_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/page_allocator.c
@@ -143,20 +143,93 @@ static void nvgpu_page_release_co(struct nvgpu_allocator *a,
143 nvgpu_alloc_release_carveout(&va->source_allocator, co); 143 nvgpu_alloc_release_carveout(&va->source_allocator, co);
144} 144}
145 145
146static void *nvgpu_page_alloc_sgl_next(void *sgl)
147{
148 struct nvgpu_mem_sgl *nvgpu_sgl = sgl;
149
150 return nvgpu_sgl->next;
151}
152
153static u64 nvgpu_page_alloc_sgl_phys(void *sgl)
154{
155 struct nvgpu_mem_sgl *nvgpu_sgl = sgl;
156
157 return nvgpu_sgl->phys;
158}
159
160static u64 nvgpu_page_alloc_sgl_dma(void *sgl)
161{
162 struct nvgpu_mem_sgl *nvgpu_sgl = sgl;
163
164 return nvgpu_sgl->dma;
165}
166
167static u64 nvgpu_page_alloc_sgl_length(void *sgl)
168{
169 struct nvgpu_mem_sgl *nvgpu_sgl = sgl;
170
171 return nvgpu_sgl->length;
172}
173
174static u64 nvgpu_page_alloc_sgl_gpu_addr(struct gk20a *g, void *sgl,
175 struct nvgpu_gmmu_attrs *attrs)
176{
177 struct nvgpu_mem_sgl *nvgpu_sgl = sgl;
178
179 return nvgpu_sgl->phys;
180}
181
182static void nvgpu_page_alloc_sgt_free(struct gk20a *g, struct nvgpu_sgt *sgt)
183{
184 /*
185 * No-op here. The free is handled by the page_alloc free() functions.
186 */
187}
188
189/*
190 * These implement the generic scatter gather ops for pages allocated
191 * by the page allocator. however, the primary aim for this, is of course,
192 * vidmem.
193 */
194static const struct nvgpu_sgt_ops page_alloc_sgl_ops = {
195 .sgl_next = nvgpu_page_alloc_sgl_next,
196 .sgl_phys = nvgpu_page_alloc_sgl_phys,
197 .sgl_dma = nvgpu_page_alloc_sgl_dma,
198 .sgl_length = nvgpu_page_alloc_sgl_length,
199 .sgl_gpu_addr = nvgpu_page_alloc_sgl_gpu_addr,
200 .sgt_free = nvgpu_page_alloc_sgt_free,
201};
202
203/*
204 * This actually frees the sgl memory. Used by the page_alloc free() functions.
205 */
206static void nvgpu_page_alloc_sgl_proper_free(struct gk20a *g,
207 struct nvgpu_mem_sgl *sgl)
208{
209 struct nvgpu_mem_sgl *next;
210
211 while (sgl) {
212 next = sgl->next;
213 nvgpu_kfree(g, sgl);
214 sgl = next;
215 }
216}
217
146static void __nvgpu_free_pages(struct nvgpu_page_allocator *a, 218static void __nvgpu_free_pages(struct nvgpu_page_allocator *a,
147 struct nvgpu_page_alloc *alloc, 219 struct nvgpu_page_alloc *alloc,
148 bool free_buddy_alloc) 220 bool free_buddy_alloc)
149{ 221{
150 struct nvgpu_mem_sgl *sgl = alloc->sgl; 222 struct nvgpu_mem_sgl *sgl = alloc->sgt.sgl;
151 223
152 if (free_buddy_alloc) { 224 if (free_buddy_alloc) {
153 while (sgl) { 225 while (sgl) {
154 nvgpu_free(&a->source_allocator, sgl->phys); 226 nvgpu_free(&a->source_allocator,
155 sgl = nvgpu_mem_sgl_next(sgl); 227 nvgpu_sgt_get_phys(&alloc->sgt, sgl));
228 sgl = nvgpu_sgt_get_next(&alloc->sgt, sgl);
156 } 229 }
157 } 230 }
158 231
159 nvgpu_mem_sgl_free(a->owner->g, alloc->sgl); 232 nvgpu_page_alloc_sgl_proper_free(a->owner->g, sgl);
160 nvgpu_kmem_cache_free(a->alloc_cache, alloc); 233 nvgpu_kmem_cache_free(a->alloc_cache, alloc);
161} 234}
162 235
@@ -306,7 +379,7 @@ static int __do_slab_alloc(struct nvgpu_page_allocator *a,
306 alloc->length = slab_page->slab_size; 379 alloc->length = slab_page->slab_size;
307 alloc->base = slab_page->page_addr + (offs * slab_page->slab_size); 380 alloc->base = slab_page->page_addr + (offs * slab_page->slab_size);
308 381
309 sgl = alloc->sgl; 382 sgl = alloc->sgt.sgl;
310 sgl->phys = alloc->base; 383 sgl->phys = alloc->base;
311 sgl->dma = alloc->base; 384 sgl->dma = alloc->base;
312 sgl->length = alloc->length; 385 sgl->length = alloc->length;
@@ -338,13 +411,16 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_slab(
338 palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n"); 411 palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n");
339 goto fail; 412 goto fail;
340 } 413 }
414
415 alloc->sgt.ops = &page_alloc_sgl_ops;
416
341 sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl)); 417 sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl));
342 if (!sgl) { 418 if (!sgl) {
343 palloc_dbg(a, "OOM: could not alloc sgl struct!\n"); 419 palloc_dbg(a, "OOM: could not alloc sgl struct!\n");
344 goto fail; 420 goto fail;
345 } 421 }
346 422
347 alloc->sgl = sgl; 423 alloc->sgt.sgl = sgl;
348 err = __do_slab_alloc(a, slab, alloc); 424 err = __do_slab_alloc(a, slab, alloc);
349 if (err) 425 if (err)
350 goto fail; 426 goto fail;
@@ -432,6 +508,7 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages(
432 memset(alloc, 0, sizeof(*alloc)); 508 memset(alloc, 0, sizeof(*alloc));
433 509
434 alloc->length = pages << a->page_shift; 510 alloc->length = pages << a->page_shift;
511 alloc->sgt.ops = &page_alloc_sgl_ops;
435 512
436 while (pages) { 513 while (pages) {
437 u64 chunk_addr = 0; 514 u64 chunk_addr = 0;
@@ -495,7 +572,7 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages(
495 if (prev_sgl) 572 if (prev_sgl)
496 prev_sgl->next = sgl; 573 prev_sgl->next = sgl;
497 else 574 else
498 alloc->sgl = sgl; 575 alloc->sgt.sgl = sgl;
499 576
500 prev_sgl = sgl; 577 prev_sgl = sgl;
501 578
@@ -503,12 +580,12 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages(
503 } 580 }
504 581
505 alloc->nr_chunks = i; 582 alloc->nr_chunks = i;
506 alloc->base = alloc->sgl->phys; 583 alloc->base = ((struct nvgpu_mem_sgl *)alloc->sgt.sgl)->phys;
507 584
508 return alloc; 585 return alloc;
509 586
510fail_cleanup: 587fail_cleanup:
511 sgl = alloc->sgl; 588 sgl = alloc->sgt.sgl;
512 while (sgl) { 589 while (sgl) {
513 struct nvgpu_mem_sgl *next = sgl->next; 590 struct nvgpu_mem_sgl *next = sgl->next;
514 591
@@ -542,13 +619,13 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages(
542 619
543 palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n", 620 palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n",
544 pages << a->page_shift, pages, alloc->base); 621 pages << a->page_shift, pages, alloc->base);
545 sgl = alloc->sgl; 622 sgl = alloc->sgt.sgl;
546 while (sgl) { 623 while (sgl) {
547 palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", 624 palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n",
548 i++, 625 i++,
549 nvgpu_mem_sgl_phys(sgl), 626 nvgpu_sgt_get_phys(&alloc->sgt, sgl),
550 nvgpu_mem_sgl_length(sgl)); 627 nvgpu_sgt_get_length(&alloc->sgt, sgl));
551 sgl = sgl->next; 628 sgl = nvgpu_sgt_get_next(&alloc->sgt, sgl);
552 } 629 }
553 palloc_dbg(a, "Alloc done\n"); 630 palloc_dbg(a, "Alloc done\n");
554 631
@@ -655,6 +732,7 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed(
655 if (!alloc || !sgl) 732 if (!alloc || !sgl)
656 goto fail; 733 goto fail;
657 734
735 alloc->sgt.ops = &page_alloc_sgl_ops;
658 alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length, 0); 736 alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length, 0);
659 if (!alloc->base) { 737 if (!alloc->base) {
660 WARN(1, "nvgpu: failed to fixed alloc pages @ 0x%010llx", base); 738 WARN(1, "nvgpu: failed to fixed alloc pages @ 0x%010llx", base);
@@ -663,7 +741,7 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed(
663 741
664 alloc->nr_chunks = 1; 742 alloc->nr_chunks = 1;
665 alloc->length = length; 743 alloc->length = length;
666 alloc->sgl = sgl; 744 alloc->sgt.sgl = sgl;
667 745
668 sgl->phys = alloc->base; 746 sgl->phys = alloc->base;
669 sgl->dma = alloc->base; 747 sgl->dma = alloc->base;
@@ -708,13 +786,13 @@ static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a,
708 786
709 palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)\n", 787 palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)\n",
710 alloc->base, aligned_len, pages); 788 alloc->base, aligned_len, pages);
711 sgl = alloc->sgl; 789 sgl = alloc->sgt.sgl;
712 while (sgl) { 790 while (sgl) {
713 palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", 791 palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n",
714 i++, 792 i++,
715 nvgpu_mem_sgl_phys(sgl), 793 nvgpu_sgt_get_phys(&alloc->sgt, sgl),
716 nvgpu_mem_sgl_length(sgl)); 794 nvgpu_sgt_get_length(&alloc->sgt, sgl));
717 sgl = sgl->next; 795 sgl = nvgpu_sgt_get_next(&alloc->sgt, sgl);
718 } 796 }
719 797
720 a->nr_fixed_allocs++; 798 a->nr_fixed_allocs++;
diff --git a/drivers/gpu/nvgpu/common/pramin.c b/drivers/gpu/nvgpu/common/pramin.c
index bb7d930e..ae9c9b1f 100644
--- a/drivers/gpu/nvgpu/common/pramin.c
+++ b/drivers/gpu/nvgpu/common/pramin.c
@@ -84,24 +84,23 @@ void nvgpu_pramin_access_batched(struct gk20a *g, struct nvgpu_mem *mem,
84 u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg) 84 u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg)
85{ 85{
86 struct nvgpu_page_alloc *alloc = NULL; 86 struct nvgpu_page_alloc *alloc = NULL;
87 struct nvgpu_mem_sgl *sgl; 87 struct nvgpu_sgt *sgt;
88 void *sgl;
88 u32 byteoff, start_reg, until_end, n; 89 u32 byteoff, start_reg, until_end, n;
89 90
90 alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl); 91 alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl);
91 sgl = alloc->sgl; 92 sgt = &alloc->sgt;
92 while (sgl) { 93 for (sgl = sgt->sgl; sgl; sgl = nvgpu_sgt_get_next(sgt, sgl)) {
93 if (offset >= nvgpu_mem_sgl_length(sgl)) { 94 if (offset >= nvgpu_sgt_get_length(sgt, sgl))
94 offset -= nvgpu_mem_sgl_length(sgl); 95 offset -= nvgpu_sgt_get_length(sgt, sgl);
95 sgl = sgl->next; 96 else
96 } else {
97 break; 97 break;
98 }
99 } 98 }
100 99
101 while (size) { 100 while (size) {
102 u32 sgl_len = (u32)nvgpu_mem_sgl_length(sgl); 101 u32 sgl_len = (u32)nvgpu_sgt_get_length(sgt, sgl);
103 102
104 byteoff = g->ops.pramin.enter(g, mem, sgl, 103 byteoff = g->ops.pramin.enter(g, mem, sgt, sgl,
105 offset / sizeof(u32)); 104 offset / sizeof(u32));
106 start_reg = g->ops.pramin.data032_r(byteoff / sizeof(u32)); 105 start_reg = g->ops.pramin.data032_r(byteoff / sizeof(u32));
107 until_end = SZ_1M - (byteoff & (SZ_1M - 1)); 106 until_end = SZ_1M - (byteoff & (SZ_1M - 1));
@@ -117,7 +116,7 @@ void nvgpu_pramin_access_batched(struct gk20a *g, struct nvgpu_mem *mem,
117 size -= n; 116 size -= n;
118 117
119 if (n == (sgl_len - offset)) { 118 if (n == (sgl_len - offset)) {
120 sgl = nvgpu_mem_sgl_next(sgl); 119 sgl = nvgpu_sgt_get_next(sgt, sgl);
121 offset = 0; 120 offset = 0;
122 } else { 121 } else {
123 offset += n; 122 offset += n;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 355228db..13c62691 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -34,7 +34,7 @@ struct gk20a_debug_output;
34struct nvgpu_clk_pll_debug_data; 34struct nvgpu_clk_pll_debug_data;
35struct nvgpu_nvhost_dev; 35struct nvgpu_nvhost_dev;
36struct nvgpu_cpu_time_correlation_sample; 36struct nvgpu_cpu_time_correlation_sample;
37struct nvgpu_mem_sgl; 37struct nvgpu_mem_sgt;
38 38
39#include <nvgpu/lock.h> 39#include <nvgpu/lock.h>
40#include <nvgpu/thread.h> 40#include <nvgpu/thread.h>
@@ -700,7 +700,7 @@ struct gpu_ops {
700 bool (*support_sparse)(struct gk20a *g); 700 bool (*support_sparse)(struct gk20a *g);
701 u64 (*gmmu_map)(struct vm_gk20a *vm, 701 u64 (*gmmu_map)(struct vm_gk20a *vm,
702 u64 map_offset, 702 u64 map_offset,
703 struct nvgpu_mem_sgl *sgl, 703 struct nvgpu_sgt *sgt,
704 u64 buffer_offset, 704 u64 buffer_offset,
705 u64 size, 705 u64 size,
706 int pgsz_idx, 706 int pgsz_idx,
@@ -760,9 +760,9 @@ struct gpu_ops {
760 size_t size); 760 size_t size);
761 struct { 761 struct {
762 u32 (*enter)(struct gk20a *g, struct nvgpu_mem *mem, 762 u32 (*enter)(struct gk20a *g, struct nvgpu_mem *mem,
763 struct nvgpu_mem_sgl *sgl, u32 w); 763 struct nvgpu_sgt *sgt, void *sgl, u32 w);
764 void (*exit)(struct gk20a *g, struct nvgpu_mem *mem, 764 void (*exit)(struct gk20a *g, struct nvgpu_mem *mem,
765 struct nvgpu_mem_sgl *sgl); 765 void *sgl);
766 u32 (*data032_r)(u32 i); 766 u32 (*data032_r)(u32 i);
767 } pramin; 767 } pramin;
768 struct { 768 struct {
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index cd34e769..0e0326dd 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1151,7 +1151,8 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem)
1151 struct gk20a_fence *gk20a_fence_out = NULL; 1151 struct gk20a_fence *gk20a_fence_out = NULL;
1152 struct gk20a_fence *gk20a_last_fence = NULL; 1152 struct gk20a_fence *gk20a_last_fence = NULL;
1153 struct nvgpu_page_alloc *alloc = NULL; 1153 struct nvgpu_page_alloc *alloc = NULL;
1154 struct nvgpu_mem_sgl *sgl = NULL; 1154 struct nvgpu_sgt *sgt = NULL;
1155 void *sgl = NULL;
1155 int err = 0; 1156 int err = 0;
1156 1157
1157 if (g->mm.vidmem.ce_ctx_id == (u32)~0) 1158 if (g->mm.vidmem.ce_ctx_id == (u32)~0)
@@ -1159,7 +1160,8 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem)
1159 1160
1160 alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl); 1161 alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl);
1161 1162
1162 sgl = alloc->sgl; 1163 sgt = &alloc->sgt;
1164 sgl = sgt->sgl;
1163 while (sgl) { 1165 while (sgl) {
1164 if (gk20a_last_fence) 1166 if (gk20a_last_fence)
1165 gk20a_fence_put(gk20a_last_fence); 1167 gk20a_fence_put(gk20a_last_fence);
@@ -1167,8 +1169,8 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem)
1167 err = gk20a_ce_execute_ops(g, 1169 err = gk20a_ce_execute_ops(g,
1168 g->mm.vidmem.ce_ctx_id, 1170 g->mm.vidmem.ce_ctx_id,
1169 0, 1171 0,
1170 nvgpu_mem_sgl_phys(sgl), 1172 nvgpu_sgt_get_phys(sgt, sgl),
1171 nvgpu_mem_sgl_length(sgl), 1173 nvgpu_sgt_get_length(sgt, sgl),
1172 0x00000000, 1174 0x00000000,
1173 NVGPU_CE_DST_LOCATION_LOCAL_FB, 1175 NVGPU_CE_DST_LOCATION_LOCAL_FB,
1174 NVGPU_CE_MEMSET, 1176 NVGPU_CE_MEMSET,
@@ -1183,7 +1185,7 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem)
1183 } 1185 }
1184 1186
1185 gk20a_last_fence = gk20a_fence_out; 1187 gk20a_last_fence = gk20a_fence_out;
1186 sgl = nvgpu_mem_sgl_next(sgl); 1188 sgl = nvgpu_sgt_get_next(sgt, sgl);
1187 } 1189 }
1188 1190
1189 if (gk20a_last_fence) { 1191 if (gk20a_last_fence) {
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 2fdc1729..9c5e0fae 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -361,7 +361,7 @@ static inline phys_addr_t gk20a_mem_phys(struct nvgpu_mem *mem)
361 361
362u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, 362u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
363 u64 map_offset, 363 u64 map_offset,
364 struct nvgpu_mem_sgl *sgl, 364 struct nvgpu_sgt *sgt,
365 u64 buffer_offset, 365 u64 buffer_offset,
366 u64 size, 366 u64 size,
367 int pgsz_idx, 367 int pgsz_idx,
diff --git a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c
index 8a34a63c..aaba4ffc 100644
--- a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c
@@ -26,9 +26,9 @@
26 26
27/* WARNING: returns pramin_window_lock taken, complement with pramin_exit() */ 27/* WARNING: returns pramin_window_lock taken, complement with pramin_exit() */
28u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem, 28u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem,
29 struct nvgpu_mem_sgl *sgl, u32 w) 29 struct nvgpu_sgt *sgt, void *sgl, u32 w)
30{ 30{
31 u64 bufbase = nvgpu_mem_sgl_phys(sgl); 31 u64 bufbase = nvgpu_sgt_get_phys(sgt, sgl);
32 u64 addr = bufbase + w * sizeof(u32); 32 u64 addr = bufbase + w * sizeof(u32);
33 u32 hi = (u32)((addr & ~(u64)0xfffff) 33 u32 hi = (u32)((addr & ~(u64)0xfffff)
34 >> bus_bar0_window_target_bar0_window_base_shift_v()); 34 >> bus_bar0_window_target_bar0_window_base_shift_v());
@@ -41,8 +41,8 @@ u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem,
41 gk20a_dbg(gpu_dbg_mem, 41 gk20a_dbg(gpu_dbg_mem,
42 "0x%08x:%08x begin for %p,%p at [%llx,%llx] (sz %llx)", 42 "0x%08x:%08x begin for %p,%p at [%llx,%llx] (sz %llx)",
43 hi, lo, mem, sgl, bufbase, 43 hi, lo, mem, sgl, bufbase,
44 bufbase + nvgpu_mem_sgl_phys(sgl), 44 bufbase + nvgpu_sgt_get_phys(sgt, sgl),
45 nvgpu_mem_sgl_length(sgl)); 45 nvgpu_sgt_get_length(sgt, sgl));
46 46
47 WARN_ON(!bufbase); 47 WARN_ON(!bufbase);
48 48
@@ -58,7 +58,7 @@ u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem,
58} 58}
59 59
60void gk20a_pramin_exit(struct gk20a *g, struct nvgpu_mem *mem, 60void gk20a_pramin_exit(struct gk20a *g, struct nvgpu_mem *mem,
61 struct nvgpu_mem_sgl *sgl) 61 void *sgl)
62{ 62{
63 gk20a_dbg(gpu_dbg_mem, "end for %p,%p", mem, sgl); 63 gk20a_dbg(gpu_dbg_mem, "end for %p,%p", mem, sgl);
64 64
diff --git a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.h b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.h
index fc5ba919..29e76978 100644
--- a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.h
@@ -22,7 +22,7 @@ struct nvgpu_mem;
22struct nvgpu_mem_sgl; 22struct nvgpu_mem_sgl;
23 23
24u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem, 24u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem,
25 struct nvgpu_mem_sgl *sgl, u32 w); 25 struct nvgpu_sgt *sgt, void *sgl, u32 w);
26void gk20a_pramin_exit(struct gk20a *g, struct nvgpu_mem *mem, 26void gk20a_pramin_exit(struct gk20a *g, struct nvgpu_mem *mem,
27 struct nvgpu_mem_sgl *sgl); 27 void *sgl);
28#endif 28#endif
diff --git a/drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h
index f96c2801..517d834c 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h
@@ -20,6 +20,7 @@
20struct page; 20struct page;
21struct sg_table; 21struct sg_table;
22struct scatterlist; 22struct scatterlist;
23struct nvgpu_sgt;
23 24
24struct gk20a; 25struct gk20a;
25struct nvgpu_mem; 26struct nvgpu_mem;
@@ -32,9 +33,11 @@ struct nvgpu_mem_priv {
32}; 33};
33 34
34u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl); 35u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl);
35struct nvgpu_mem_sgl *nvgpu_mem_sgl_create(struct gk20a *g, 36struct nvgpu_sgt *nvgpu_mem_linux_sgt_create(struct gk20a *g,
37 struct sg_table *sgt);
38void nvgpu_mem_linux_sgt_free(struct gk20a *g, struct nvgpu_sgt *sgt);
39struct nvgpu_sgt *nvgpu_linux_sgt_create(struct gk20a *g,
36 struct sg_table *sgt); 40 struct sg_table *sgt);
37
38/** 41/**
39 * __nvgpu_mem_create_from_pages - Create an nvgpu_mem from physical pages. 42 * __nvgpu_mem_create_from_pages - Create an nvgpu_mem from physical pages.
40 * 43 *
diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
index 7d19cf81..beffbfe8 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
@@ -46,12 +46,41 @@ enum nvgpu_aperture {
46 APERTURE_VIDMEM 46 APERTURE_VIDMEM
47}; 47};
48 48
49struct nvgpu_sgt_ops {
50 void *(*sgl_next)(void *sgl);
51 u64 (*sgl_phys)(void *sgl);
52 u64 (*sgl_dma)(void *sgl);
53 u64 (*sgl_length)(void *sgl);
54 u64 (*sgl_gpu_addr)(struct gk20a *g, void *sgl,
55 struct nvgpu_gmmu_attrs *attrs);
56 /*
57 * Note: this operates on the whole SGT not a specific SGL entry.
58 */
59 void (*sgt_free)(struct gk20a *g, struct nvgpu_sgt *sgt);
60};
61
62/*
63 * Scatter gather table: this is a list of scatter list entries and the ops for
64 * interacting with those entries.
65 */
66struct nvgpu_sgt {
67 /*
68 * Ops for interacting with the underlying scatter gather list entries.
69 */
70 const struct nvgpu_sgt_ops *ops;
71
72 /*
73 * The first node in the scatter gather list.
74 */
75 void *sgl;
76};
77
49/* 78/*
50 * This struct holds the necessary information for describing a struct 79 * This struct holds the necessary information for describing a struct
51 * nvgpu_mem's scatter gather list. 80 * nvgpu_mem's scatter gather list.
52 * 81 *
53 * These are created in a platform dependent way. As a result the function 82 * Not all nvgpu_sgt's use this particular implementation. Nor is a given OS
54 * definition for allocating these lives in the <nvgpu/_OS_/nvgpu_mem.h> file. 83 * required to use this at all.
55 */ 84 */
56struct nvgpu_mem_sgl { 85struct nvgpu_mem_sgl {
57 /* 86 /*
@@ -164,6 +193,32 @@ static inline bool nvgpu_mem_is_valid(struct nvgpu_mem *mem)
164 193
165} 194}
166 195
196/*
197 * Create a nvgpu_sgt of the default implementation
198 */
199struct nvgpu_sgt *nvgpu_sgt_create(struct gk20a *g);
200
201/**
202 * nvgpu_mem_sgt_create_from_mem - Create a scatter list from an nvgpu_mem.
203 *
204 * @g - The GPU.
205 * @mem - The source memory allocation to use.
206 *
207 * Create a scatter gather table from the passed @mem struct. This list lets the
208 * calling code iterate across each chunk of a DMA allocation for when that DMA
209 * allocation is not completely contiguous.
210 */
211struct nvgpu_sgt *nvgpu_sgt_create_from_mem(struct gk20a *g,
212 struct nvgpu_mem *mem);
213
214void *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt, void *sgl);
215u64 nvgpu_sgt_get_phys(struct nvgpu_sgt *sgt, void *sgl);
216u64 nvgpu_sgt_get_dma(struct nvgpu_sgt *sgt, void *sgl);
217u64 nvgpu_sgt_get_length(struct nvgpu_sgt *sgt, void *sgl);
218u64 nvgpu_sgt_get_gpu_addr(struct nvgpu_sgt *sgt, struct gk20a *g, void *sgl,
219 struct nvgpu_gmmu_attrs *attrs);
220void nvgpu_sgt_free(struct nvgpu_sgt *sgt, struct gk20a *g);
221
167/** 222/**
168 * nvgpu_mem_create_from_mem - Create a new nvgpu_mem struct from an old one. 223 * nvgpu_mem_create_from_mem - Create a new nvgpu_mem struct from an old one.
169 * 224 *
@@ -200,27 +255,6 @@ int nvgpu_mem_create_from_mem(struct gk20a *g,
200 struct nvgpu_mem *dest, struct nvgpu_mem *src, 255 struct nvgpu_mem *dest, struct nvgpu_mem *src,
201 int start_page, int nr_pages); 256 int start_page, int nr_pages);
202 257
203/**
204 * nvgpu_mem_sgl_create_from_mem - Create a scatter list from an nvgpu_mem.
205 *
206 * @g - The GPU.
207 * @mem - The source memory allocation to use.
208 *
209 * Create a scatter gather list from the passed @mem struct. This list lets the
210 * calling code iterate across each chunk of a DMA allocation for when that DMA
211 * allocation is not completely contiguous.
212 */
213struct nvgpu_mem_sgl *nvgpu_mem_sgl_create_from_mem(struct gk20a *g,
214 struct nvgpu_mem *mem);
215void nvgpu_mem_sgl_free(struct gk20a *g, struct nvgpu_mem_sgl *sgl);
216
217struct nvgpu_mem_sgl *nvgpu_mem_sgl_next(struct nvgpu_mem_sgl *sgl);
218u64 nvgpu_mem_sgl_phys(struct nvgpu_mem_sgl *sgl);
219u64 nvgpu_mem_sgl_dma(struct nvgpu_mem_sgl *sgl);
220u64 nvgpu_mem_sgl_length(struct nvgpu_mem_sgl *sgl);
221u64 nvgpu_mem_sgl_gpu_addr(struct gk20a *g, struct nvgpu_mem_sgl *sgl,
222 struct nvgpu_gmmu_attrs *attrs);
223
224/* 258/*
225 * Buffer accessors - wrap between begin() and end() if there is no permanent 259 * Buffer accessors - wrap between begin() and end() if there is no permanent
226 * kernel mapping for this buffer. 260 * kernel mapping for this buffer.
diff --git a/drivers/gpu/nvgpu/include/nvgpu/page_allocator.h b/drivers/gpu/nvgpu/include/nvgpu/page_allocator.h
index de83ca7f..b22c55d0 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/page_allocator.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/page_allocator.h
@@ -91,10 +91,10 @@ page_alloc_slab_page_from_list_entry(struct nvgpu_list_node *node)
91 */ 91 */
92struct nvgpu_page_alloc { 92struct nvgpu_page_alloc {
93 /* 93 /*
94 * nvgpu_mem_sgl for describing the actual allocation. Convenient for 94 * nvgpu_sgt for describing the actual allocation. Convenient for
95 * GMMU mapping. 95 * GMMU mapping.
96 */ 96 */
97 struct nvgpu_mem_sgl *sgl; 97 struct nvgpu_sgt sgt;
98 98
99 int nr_chunks; 99 int nr_chunks;
100 u64 length; 100 u64 length;
diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c
index ee9b791a..d9324363 100644
--- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c
@@ -40,7 +40,7 @@ static inline int add_mem_desc(struct tegra_vgpu_mem_desc *mem_desc,
40 40
41static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm, 41static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
42 u64 map_offset, 42 u64 map_offset,
43 struct nvgpu_mem_sgl *sgl, 43 struct nvgpu_sgt *sgt,
44 u64 buffer_offset, 44 u64 buffer_offset,
45 u64 size, 45 u64 size,
46 int pgsz_idx, 46 int pgsz_idx,
@@ -66,12 +66,13 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
66 void *handle = NULL; 66 void *handle = NULL;
67 size_t oob_size; 67 size_t oob_size;
68 u8 prot; 68 u8 prot;
69 void *sgl;
69 70
70 gk20a_dbg_fn(""); 71 gk20a_dbg_fn("");
71 72
72 /* FIXME: add support for sparse mappings */ 73 /* FIXME: add support for sparse mappings */
73 74
74 if (WARN_ON(!sgl) || WARN_ON(!g->mm.bypass_smmu)) 75 if (WARN_ON(!sgt) || WARN_ON(!g->mm.bypass_smmu))
75 return 0; 76 return 0;
76 77
77 if (space_to_skip & (page_size - 1)) 78 if (space_to_skip & (page_size - 1))
@@ -97,7 +98,7 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
97 err = -EINVAL; 98 err = -EINVAL;
98 goto fail; 99 goto fail;
99 } 100 }
100 101 sgl = sgt->sgl;
101 while (sgl) { 102 while (sgl) {
102 u64 phys_addr; 103 u64 phys_addr;
103 u64 chunk_length; 104 u64 chunk_length;
@@ -106,15 +107,15 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
106 * Cut out sgl ents for space_to_skip. 107 * Cut out sgl ents for space_to_skip.
107 */ 108 */
108 if (space_to_skip && 109 if (space_to_skip &&
109 space_to_skip >= nvgpu_mem_sgl_length(sgl)) { 110 space_to_skip >= nvgpu_sgt_get_length(sgt, sgl)) {
110 space_to_skip -= nvgpu_mem_sgl_length(sgl); 111 space_to_skip -= nvgpu_sgt_get_length(sgt, sgl);
111 sgl = nvgpu_mem_sgl_next(sgl); 112 sgl = nvgpu_sgt_get_next(sgt, sgl);
112 continue; 113 continue;
113 } 114 }
114 115
115 phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip; 116 phys_addr = nvgpu_sgt_get_phys(sgt, sgl) + space_to_skip;
116 chunk_length = min(size, 117 chunk_length = min(size,
117 nvgpu_mem_sgl_length(sgl) - space_to_skip); 118 nvgpu_sgt_get_length(sgt, sgl) - space_to_skip);
118 119
119 if (add_mem_desc(&mem_desc[mem_desc_count++], phys_addr, 120 if (add_mem_desc(&mem_desc[mem_desc_count++], phys_addr,
120 chunk_length, &oob_size)) { 121 chunk_length, &oob_size)) {
@@ -124,7 +125,7 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
124 125
125 space_to_skip = 0; 126 space_to_skip = 0;
126 size -= chunk_length; 127 size -= chunk_length;
127 sgl = nvgpu_mem_sgl_next(sgl); 128 sgl = nvgpu_sgt_get_next(sgt, sgl);
128 129
129 if (size == 0) 130 if (size == 0)
130 break; 131 break;
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
index 5da6f158..adb01ae5 100644
--- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -78,7 +78,7 @@ int vgpu_init_mm_support(struct gk20a *g)
78 78
79static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm, 79static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
80 u64 map_offset, 80 u64 map_offset,
81 struct nvgpu_mem_sgl *sgl, 81 struct nvgpu_sgt *sgt,
82 u64 buffer_offset, 82 u64 buffer_offset,
83 u64 size, 83 u64 size,
84 int pgsz_idx, 84 int pgsz_idx,
@@ -98,7 +98,7 @@ static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
98 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d); 98 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d);
99 struct tegra_vgpu_cmd_msg msg; 99 struct tegra_vgpu_cmd_msg msg;
100 struct tegra_vgpu_as_map_params *p = &msg.params.as_map; 100 struct tegra_vgpu_as_map_params *p = &msg.params.as_map;
101 u64 addr = nvgpu_mem_sgl_gpu_addr(g, sgl, NULL); 101 u64 addr = nvgpu_sgt_get_gpu_addr(sgt, g, sgt->sgl, NULL);
102 u8 prot; 102 u8 prot;
103 103
104 gk20a_dbg_fn(""); 104 gk20a_dbg_fn("");