summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/common/linux/nvgpu_mem.c144
-rw-r--r--drivers/gpu/nvgpu/common/linux/vm.c17
-rw-r--r--drivers/gpu/nvgpu/common/mm/gmmu.c67
-rw-r--r--drivers/gpu/nvgpu/common/mm/nvgpu_mem.c47
-rw-r--r--drivers/gpu/nvgpu/common/mm/page_allocator.c114
-rw-r--r--drivers/gpu/nvgpu/common/pramin.c21
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h8
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c12
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h2
-rw-r--r--drivers/gpu/nvgpu/gk20a/pramin_gk20a.c10
-rw-r--r--drivers/gpu/nvgpu/gk20a/pramin_gk20a.h4
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h7
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h80
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/page_allocator.h4
-rw-r--r--drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c19
-rw-r--r--drivers/gpu/nvgpu/vgpu/mm_vgpu.c4
16 files changed, 320 insertions, 240 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
index eb54f3fd..8d8909dd 100644
--- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
@@ -397,42 +397,59 @@ int __nvgpu_mem_create_from_pages(struct gk20a *g, struct nvgpu_mem *dest,
397 return 0; 397 return 0;
398} 398}
399 399
400static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_dup(struct gk20a *g, 400static void *nvgpu_mem_linux_sgl_next(void *sgl)
401 struct nvgpu_mem_sgl *sgl)
402{ 401{
403 struct nvgpu_mem_sgl *head, *next; 402 return sg_next((struct scatterlist *)sgl);
403}
404 404
405 head = nvgpu_kzalloc(g, sizeof(*sgl)); 405static u64 nvgpu_mem_linux_sgl_phys(void *sgl)
406 if (!head) 406{
407 return NULL; 407 return (u64)sg_phys((struct scatterlist *)sgl);
408}
408 409
409 next = head; 410static u64 nvgpu_mem_linux_sgl_dma(void *sgl)
410 while (true) { 411{
411 nvgpu_log(g, gpu_dbg_sgl, 412 return (u64)sg_dma_address((struct scatterlist *)sgl);
412 " phys: 0x%-12llx dma: 0x%-12llx len: 0x%llx", 413}
413 sgl->phys, sgl->dma, sgl->length);
414
415 next->dma = sgl->dma;
416 next->phys = sgl->phys;
417 next->length = sgl->length;
418 next->next = NULL;
419
420 sgl = nvgpu_mem_sgl_next(sgl);
421 if (!sgl)
422 break;
423
424 next->next = nvgpu_kzalloc(g, sizeof(*sgl));
425 if (!next->next) {
426 nvgpu_mem_sgl_free(g, head);
427 return NULL;
428 }
429 next = next->next;
430 }
431 414
432 return head; 415static u64 nvgpu_mem_linux_sgl_length(void *sgl)
416{
417 return (u64)((struct scatterlist *)sgl)->length;
433} 418}
434 419
435static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_create_from_vidmem( 420static u64 nvgpu_mem_linux_sgl_gpu_addr(struct gk20a *g, void *sgl,
421 struct nvgpu_gmmu_attrs *attrs)
422{
423 if (sg_dma_address((struct scatterlist *)sgl) == 0)
424 return g->ops.mm.gpu_phys_addr(g, attrs,
425 sg_phys((struct scatterlist *)sgl));
426
427 if (sg_dma_address((struct scatterlist *)sgl) == DMA_ERROR_CODE)
428 return 0;
429
430 return gk20a_mm_smmu_vaddr_translate(g,
431 sg_dma_address((struct scatterlist *)sgl));
432}
433
434static void nvgpu_mem_linux_sgl_free(struct gk20a *g, struct nvgpu_sgt *sgt)
435{
436 /*
437 * Free this SGT. All we do is free the passed SGT. The actual Linux
438 * SGT/SGL needs to be freed separately.
439 */
440 nvgpu_kfree(g, sgt);
441}
442
443static const struct nvgpu_sgt_ops nvgpu_linux_sgt_ops = {
444 .sgl_next = nvgpu_mem_linux_sgl_next,
445 .sgl_phys = nvgpu_mem_linux_sgl_phys,
446 .sgl_dma = nvgpu_mem_linux_sgl_dma,
447 .sgl_length = nvgpu_mem_linux_sgl_length,
448 .sgl_gpu_addr = nvgpu_mem_linux_sgl_gpu_addr,
449 .sgt_free = nvgpu_mem_linux_sgl_free,
450};
451
452static struct nvgpu_sgt *__nvgpu_mem_get_sgl_from_vidmem(
436 struct gk20a *g, 453 struct gk20a *g,
437 struct scatterlist *linux_sgl) 454 struct scatterlist *linux_sgl)
438{ 455{
@@ -442,70 +459,31 @@ static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_create_from_vidmem(
442 if (!vidmem_alloc) 459 if (!vidmem_alloc)
443 return NULL; 460 return NULL;
444 461
445 nvgpu_log(g, gpu_dbg_sgl, "Vidmem sgl:"); 462 return &vidmem_alloc->sgt;
446
447 return __nvgpu_mem_sgl_dup(g, vidmem_alloc->sgl);
448} 463}
449 464
450struct nvgpu_mem_sgl *nvgpu_mem_sgl_create(struct gk20a *g, 465struct nvgpu_sgt *nvgpu_linux_sgt_create(struct gk20a *g, struct sg_table *sgt)
451 struct sg_table *sgt)
452{ 466{
453 struct nvgpu_mem_sgl *head, *sgl, *next; 467 struct nvgpu_sgt *nvgpu_sgt;
454 struct scatterlist *linux_sgl = sgt->sgl; 468 struct scatterlist *linux_sgl = sgt->sgl;
455 469
456 if (is_vidmem_page_alloc(sg_dma_address(linux_sgl))) 470 if (is_vidmem_page_alloc(sg_dma_address(linux_sgl)))
457 return __nvgpu_mem_sgl_create_from_vidmem(g, linux_sgl); 471 return __nvgpu_mem_get_sgl_from_vidmem(g, linux_sgl);
458 472
459 head = nvgpu_kzalloc(g, sizeof(*sgl)); 473 nvgpu_sgt = nvgpu_kzalloc(g, sizeof(*nvgpu_sgt));
460 if (!head) 474 if (!nvgpu_sgt)
461 return NULL; 475 return NULL;
462 476
463 nvgpu_log(g, gpu_dbg_sgl, "Making sgl:"); 477 nvgpu_log(g, gpu_dbg_sgl, "Making Linux SGL!");
464 478
465 sgl = head; 479 nvgpu_sgt->sgl = sgt->sgl;
466 while (true) { 480 nvgpu_sgt->ops = &nvgpu_linux_sgt_ops;
467 sgl->dma = sg_dma_address(linux_sgl);
468 sgl->phys = sg_phys(linux_sgl);
469 sgl->length = linux_sgl->length;
470
471 /*
472 * We don't like offsets in the pages here. This will cause
473 * problems.
474 */
475 if (WARN_ON(linux_sgl->offset)) {
476 nvgpu_mem_sgl_free(g, head);
477 return NULL;
478 }
479
480 nvgpu_log(g, gpu_dbg_sgl,
481 " phys: 0x%-12llx dma: 0x%-12llx len: 0x%llx",
482 sgl->phys, sgl->dma, sgl->length);
483
484 /*
485 * When there's no more SGL ents for the Linux SGL we are
486 * done. Don't bother making any more SGL ents for the nvgpu
487 * SGL.
488 */
489 linux_sgl = sg_next(linux_sgl);
490 if (!linux_sgl)
491 break;
492
493 next = nvgpu_kzalloc(g, sizeof(*sgl));
494 if (!next) {
495 nvgpu_mem_sgl_free(g, head);
496 return NULL;
497 }
498
499 sgl->next = next;
500 sgl = next;
501 }
502 481
503 nvgpu_log(g, gpu_dbg_sgl, "Done!"); 482 return nvgpu_sgt;
504 return head;
505} 483}
506 484
507struct nvgpu_mem_sgl *nvgpu_mem_sgl_create_from_mem(struct gk20a *g, 485struct nvgpu_sgt *nvgpu_sgt_create_from_mem(struct gk20a *g,
508 struct nvgpu_mem *mem) 486 struct nvgpu_mem *mem)
509{ 487{
510 return nvgpu_mem_sgl_create(g, mem->priv.sgt); 488 return nvgpu_linux_sgt_create(g, mem->priv.sgt);
511} 489}
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c
index 4a4429dc..2e29f0f7 100644
--- a/drivers/gpu/nvgpu/common/linux/vm.c
+++ b/drivers/gpu/nvgpu/common/linux/vm.c
@@ -69,19 +69,20 @@ static u64 nvgpu_get_buffer_alignment(struct gk20a *g, struct scatterlist *sgl,
69 69
70 if (aperture == APERTURE_VIDMEM) { 70 if (aperture == APERTURE_VIDMEM) {
71 struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl); 71 struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl);
72 struct nvgpu_mem_sgl *sgl_vid = alloc->sgl; 72 struct nvgpu_sgt *sgt = &alloc->sgt;
73 void *sgl_vid = sgt->sgl;
73 74
74 while (sgl_vid) { 75 while (sgl_vid) {
75 chunk_align = 1ULL << 76 chunk_align = 1ULL <<
76 __ffs(nvgpu_mem_sgl_phys(sgl_vid) | 77 __ffs(nvgpu_sgt_get_phys(sgt, sgl_vid)) |
77 nvgpu_mem_sgl_length(sgl_vid)); 78 nvgpu_sgt_get_length(sgt, sgl_vid);
78 79
79 if (align) 80 if (align)
80 align = min(align, chunk_align); 81 align = min(align, chunk_align);
81 else 82 else
82 align = chunk_align; 83 align = chunk_align;
83 84
84 sgl_vid = nvgpu_mem_sgl_next(sgl_vid); 85 sgl_vid = nvgpu_sgt_get_next(sgt, sgl_vid);
85 } 86 }
86 87
87 return align; 88 return align;
@@ -242,7 +243,7 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
242 struct nvgpu_vm_area *vm_area = NULL; 243 struct nvgpu_vm_area *vm_area = NULL;
243 u32 ctag_offset; 244 u32 ctag_offset;
244 enum nvgpu_aperture aperture; 245 enum nvgpu_aperture aperture;
245 struct nvgpu_mem_sgl *nvgpu_sgl; 246 struct nvgpu_sgt *nvgpu_sgt;
246 247
247 /* 248 /*
248 * The kind used as part of the key for map caching. HW may 249 * The kind used as part of the key for map caching. HW may
@@ -399,12 +400,12 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
399 ctag_offset += buffer_offset >> 400 ctag_offset += buffer_offset >>
400 ilog2(g->ops.fb.compression_page_size(g)); 401 ilog2(g->ops.fb.compression_page_size(g));
401 402
402 nvgpu_sgl = nvgpu_mem_sgl_create(g, bfr.sgt); 403 nvgpu_sgt = nvgpu_linux_sgt_create(g, bfr.sgt);
403 404
404 /* update gmmu ptes */ 405 /* update gmmu ptes */
405 map_offset = g->ops.mm.gmmu_map(vm, 406 map_offset = g->ops.mm.gmmu_map(vm,
406 map_offset, 407 map_offset,
407 nvgpu_sgl, 408 nvgpu_sgt,
408 buffer_offset, /* sg offset */ 409 buffer_offset, /* sg offset */
409 mapping_size, 410 mapping_size,
410 bfr.pgsz_idx, 411 bfr.pgsz_idx,
@@ -419,7 +420,7 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
419 if (!map_offset) 420 if (!map_offset)
420 goto clean_up; 421 goto clean_up;
421 422
422 nvgpu_mem_sgl_free(g, nvgpu_sgl); 423 nvgpu_sgt_free(nvgpu_sgt, g);
423 424
424 mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer)); 425 mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer));
425 if (!mapped_buffer) { 426 if (!mapped_buffer) {
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
index 41f5acdd..66bce8f0 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -65,14 +65,14 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
65 struct gk20a *g = gk20a_from_vm(vm); 65 struct gk20a *g = gk20a_from_vm(vm);
66 u64 vaddr; 66 u64 vaddr;
67 67
68 struct nvgpu_mem_sgl *sgl = nvgpu_mem_sgl_create_from_mem(g, mem); 68 struct nvgpu_sgt *sgt = nvgpu_sgt_create_from_mem(g, mem);
69 69
70 if (!sgl) 70 if (!sgt)
71 return -ENOMEM; 71 return -ENOMEM;
72 72
73 nvgpu_mutex_acquire(&vm->update_gmmu_lock); 73 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
74 vaddr = g->ops.mm.gmmu_map(vm, addr, 74 vaddr = g->ops.mm.gmmu_map(vm, addr,
75 sgl, /* sg list */ 75 sgt, /* sg list */
76 0, /* sg offset */ 76 0, /* sg offset */
77 size, 77 size,
78 gmmu_page_size_kernel, 78 gmmu_page_size_kernel,
@@ -86,7 +86,7 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
86 aperture); 86 aperture);
87 nvgpu_mutex_release(&vm->update_gmmu_lock); 87 nvgpu_mutex_release(&vm->update_gmmu_lock);
88 88
89 nvgpu_mem_sgl_free(g, sgl); 89 nvgpu_sgt_free(sgt, g);
90 90
91 if (!vaddr) { 91 if (!vaddr) {
92 nvgpu_err(g, "failed to map buffer!"); 92 nvgpu_err(g, "failed to map buffer!");
@@ -464,7 +464,7 @@ static int __set_pd_level(struct vm_gk20a *vm,
464 * VIDMEM version of the update_ptes logic. 464 * VIDMEM version of the update_ptes logic.
465 */ 465 */
466static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm, 466static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm,
467 struct nvgpu_mem_sgl *sgl, 467 struct nvgpu_sgt *sgt,
468 u64 space_to_skip, 468 u64 space_to_skip,
469 u64 virt_addr, 469 u64 virt_addr,
470 u64 length, 470 u64 length,
@@ -472,8 +472,9 @@ static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm,
472{ 472{
473 u64 phys_addr, chunk_length; 473 u64 phys_addr, chunk_length;
474 int err = 0; 474 int err = 0;
475 void *sgl;
475 476
476 if (!sgl) { 477 if (!sgt) {
477 /* 478 /*
478 * This is considered an unmap. Just pass in 0 as the physical 479 * This is considered an unmap. Just pass in 0 as the physical
479 * address for the entire GPU range. 480 * address for the entire GPU range.
@@ -490,16 +491,17 @@ static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm,
490 * Otherwise iterate across all the chunks in this allocation and 491 * Otherwise iterate across all the chunks in this allocation and
491 * map them. 492 * map them.
492 */ 493 */
494 sgl = sgt->sgl;
493 while (sgl) { 495 while (sgl) {
494 if (space_to_skip && 496 if (space_to_skip &&
495 space_to_skip >= nvgpu_mem_sgl_length(sgl)) { 497 space_to_skip >= nvgpu_sgt_get_length(sgt, sgl)) {
496 space_to_skip -= nvgpu_mem_sgl_length(sgl); 498 space_to_skip -= nvgpu_sgt_get_length(sgt, sgl);
497 sgl = nvgpu_mem_sgl_next(sgl); 499 sgl = nvgpu_sgt_get_next(sgt, sgl);
498 continue; 500 continue;
499 } 501 }
500 502
501 phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip; 503 phys_addr = nvgpu_sgt_get_phys(sgt, sgl) + space_to_skip;
502 chunk_length = min(length, (nvgpu_mem_sgl_length(sgl) - 504 chunk_length = min(length, (nvgpu_sgt_get_length(sgt, sgl) -
503 space_to_skip)); 505 space_to_skip));
504 506
505 err = __set_pd_level(vm, &vm->pdb, 507 err = __set_pd_level(vm, &vm->pdb,
@@ -518,27 +520,27 @@ static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm,
518 */ 520 */
519 virt_addr += chunk_length; 521 virt_addr += chunk_length;
520 length -= chunk_length; 522 length -= chunk_length;
523 sgl = nvgpu_sgt_get_next(sgt, sgl);
521 524
522 if (length == 0) 525 if (length == 0)
523 break; 526 break;
524
525 sgl = nvgpu_mem_sgl_next(sgl);
526 } 527 }
527 528
528 return err; 529 return err;
529} 530}
530 531
531static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, 532static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
532 struct nvgpu_mem_sgl *sgl, 533 struct nvgpu_sgt *sgt,
533 u64 space_to_skip, 534 u64 space_to_skip,
534 u64 virt_addr, 535 u64 virt_addr,
535 u64 length, 536 u64 length,
536 struct nvgpu_gmmu_attrs *attrs) 537 struct nvgpu_gmmu_attrs *attrs)
537{ 538{
538 int err;
539 struct gk20a *g = gk20a_from_vm(vm); 539 struct gk20a *g = gk20a_from_vm(vm);
540 void *sgl;
541 int err;
540 542
541 if (!sgl) { 543 if (!sgt) {
542 /* 544 /*
543 * This is considered an unmap. Just pass in 0 as the physical 545 * This is considered an unmap. Just pass in 0 as the physical
544 * address for the entire GPU range. 546 * address for the entire GPU range.
@@ -559,8 +561,10 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
559 * mapping is simple since the "physical" address is actually a virtual 561 * mapping is simple since the "physical" address is actually a virtual
560 * IO address and will be contiguous. 562 * IO address and will be contiguous.
561 */ 563 */
564 sgl = sgt->sgl;
565
562 if (!g->mm.bypass_smmu) { 566 if (!g->mm.bypass_smmu) {
563 u64 io_addr = nvgpu_mem_sgl_gpu_addr(g, sgl, attrs); 567 u64 io_addr = nvgpu_sgt_get_gpu_addr(sgt, g, sgl, attrs);
564 568
565 io_addr += space_to_skip; 569 io_addr += space_to_skip;
566 570
@@ -586,15 +590,15 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
586 * Cut out sgl ents for space_to_skip. 590 * Cut out sgl ents for space_to_skip.
587 */ 591 */
588 if (space_to_skip && 592 if (space_to_skip &&
589 space_to_skip >= nvgpu_mem_sgl_length(sgl)) { 593 space_to_skip >= nvgpu_sgt_get_length(sgt, sgl)) {
590 space_to_skip -= nvgpu_mem_sgl_length(sgl); 594 space_to_skip -= nvgpu_sgt_get_length(sgt, sgl);
591 sgl = nvgpu_mem_sgl_next(sgl); 595 sgl = nvgpu_sgt_get_next(sgt, sgl);
592 continue; 596 continue;
593 } 597 }
594 598
595 phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip; 599 phys_addr = nvgpu_sgt_get_phys(sgt, sgl) + space_to_skip;
596 chunk_length = min(length, 600 chunk_length = min(length,
597 nvgpu_mem_sgl_length(sgl) - space_to_skip); 601 nvgpu_sgt_get_length(sgt, sgl) - space_to_skip);
598 602
599 err = __set_pd_level(vm, &vm->pdb, 603 err = __set_pd_level(vm, &vm->pdb,
600 0, 604 0,
@@ -606,7 +610,7 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
606 space_to_skip = 0; 610 space_to_skip = 0;
607 virt_addr += chunk_length; 611 virt_addr += chunk_length;
608 length -= chunk_length; 612 length -= chunk_length;
609 sgl = nvgpu_mem_sgl_next(sgl); 613 sgl = nvgpu_sgt_get_next(sgt, sgl);
610 614
611 if (length == 0) 615 if (length == 0)
612 break; 616 break;
@@ -631,7 +635,7 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
631 * case of SMMU usage. 635 * case of SMMU usage.
632 */ 636 */
633static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, 637static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
634 struct nvgpu_mem_sgl *sgl, 638 struct nvgpu_sgt *sgt,
635 u64 space_to_skip, 639 u64 space_to_skip,
636 u64 virt_addr, 640 u64 virt_addr,
637 u64 length, 641 u64 length,
@@ -669,10 +673,10 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
669 "phys offset: %#-4llx; pgsz: %3dkb perm=%-2s | " 673 "phys offset: %#-4llx; pgsz: %3dkb perm=%-2s | "
670 "kind=%#02x APT=%-6s %c%c%c%c%c", 674 "kind=%#02x APT=%-6s %c%c%c%c%c",
671 vm->name, 675 vm->name,
672 sgl ? "MAP" : "UNMAP", 676 sgt ? "MAP" : "UNMAP",
673 virt_addr, 677 virt_addr,
674 length, 678 length,
675 sgl ? nvgpu_mem_sgl_phys(sgl) : 0, 679 sgt ? nvgpu_sgt_get_phys(sgt, sgt->sgl) : 0,
676 space_to_skip, 680 space_to_skip,
677 page_size >> 10, 681 page_size >> 10,
678 nvgpu_gmmu_perm_str(attrs->rw_flag), 682 nvgpu_gmmu_perm_str(attrs->rw_flag),
@@ -690,14 +694,14 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
690 */ 694 */
691 if (attrs->aperture == APERTURE_VIDMEM) 695 if (attrs->aperture == APERTURE_VIDMEM)
692 err = __nvgpu_gmmu_update_page_table_vidmem(vm, 696 err = __nvgpu_gmmu_update_page_table_vidmem(vm,
693 sgl, 697 sgt,
694 space_to_skip, 698 space_to_skip,
695 virt_addr, 699 virt_addr,
696 length, 700 length,
697 attrs); 701 attrs);
698 else 702 else
699 err = __nvgpu_gmmu_update_page_table_sysmem(vm, 703 err = __nvgpu_gmmu_update_page_table_sysmem(vm,
700 sgl, 704 sgt,
701 space_to_skip, 705 space_to_skip,
702 virt_addr, 706 virt_addr,
703 length, 707 length,
@@ -706,7 +710,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
706 unmap_gmmu_pages(g, &vm->pdb); 710 unmap_gmmu_pages(g, &vm->pdb);
707 nvgpu_smp_mb(); 711 nvgpu_smp_mb();
708 712
709 __gmmu_dbg(g, attrs, "%-5s Done!", sgl ? "MAP" : "UNMAP"); 713 __gmmu_dbg(g, attrs, "%-5s Done!", sgt ? "MAP" : "UNMAP");
710 714
711 return err; 715 return err;
712} 716}
@@ -725,7 +729,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
725 */ 729 */
726u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, 730u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
727 u64 vaddr, 731 u64 vaddr,
728 struct nvgpu_mem_sgl *sgl, 732 struct nvgpu_sgt *sgt,
729 u64 buffer_offset, 733 u64 buffer_offset,
730 u64 size, 734 u64 size,
731 int pgsz_idx, 735 int pgsz_idx,
@@ -774,7 +778,7 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
774 allocated = true; 778 allocated = true;
775 } 779 }
776 780
777 err = __nvgpu_gmmu_update_page_table(vm, sgl, buffer_offset, 781 err = __nvgpu_gmmu_update_page_table(vm, sgt, buffer_offset,
778 vaddr, size, &attrs); 782 vaddr, size, &attrs);
779 if (err) { 783 if (err) {
780 nvgpu_err(g, "failed to update ptes on map"); 784 nvgpu_err(g, "failed to update ptes on map");
@@ -787,6 +791,7 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
787 batch->need_tlb_invalidate = true; 791 batch->need_tlb_invalidate = true;
788 792
789 return vaddr; 793 return vaddr;
794
790fail_validate: 795fail_validate:
791 if (allocated) 796 if (allocated)
792 __nvgpu_vm_free_va(vm, vaddr, pgsz_idx); 797 __nvgpu_vm_free_va(vm, vaddr, pgsz_idx);
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
index 7296c673..6decec24 100644
--- a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
@@ -19,55 +19,34 @@
19 19
20#include "gk20a/gk20a.h" 20#include "gk20a/gk20a.h"
21 21
22struct nvgpu_mem_sgl *nvgpu_mem_sgl_next(struct nvgpu_mem_sgl *sgl) 22void *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt, void *sgl)
23{ 23{
24 return sgl->next; 24 return sgt->ops->sgl_next(sgl);
25} 25}
26 26
27u64 nvgpu_mem_sgl_phys(struct nvgpu_mem_sgl *sgl) 27u64 nvgpu_sgt_get_phys(struct nvgpu_sgt *sgt, void *sgl)
28{ 28{
29 return sgl->phys; 29 return sgt->ops->sgl_phys(sgl);
30} 30}
31 31
32u64 nvgpu_mem_sgl_dma(struct nvgpu_mem_sgl *sgl) 32u64 nvgpu_sgt_get_dma(struct nvgpu_sgt *sgt, void *sgl)
33{ 33{
34 return sgl->dma; 34 return sgt->ops->sgl_dma(sgl);
35} 35}
36 36
37u64 nvgpu_mem_sgl_length(struct nvgpu_mem_sgl *sgl) 37u64 nvgpu_sgt_get_length(struct nvgpu_sgt *sgt, void *sgl)
38{ 38{
39 return sgl->length; 39 return sgt->ops->sgl_length(sgl);
40} 40}
41 41
42/* 42u64 nvgpu_sgt_get_gpu_addr(struct nvgpu_sgt *sgt, struct gk20a *g, void *sgl,
43 * This builds a GPU address for the %sgl based on whether an IOMMU is present
44 * or not. It also handles turning the physical address into the true GPU
45 * physical address that should be programmed into the page tables.
46 */
47u64 nvgpu_mem_sgl_gpu_addr(struct gk20a *g, struct nvgpu_mem_sgl *sgl,
48 struct nvgpu_gmmu_attrs *attrs) 43 struct nvgpu_gmmu_attrs *attrs)
49{ 44{
50 if (nvgpu_mem_sgl_dma(sgl) == 0) 45 return sgt->ops->sgl_gpu_addr(g, sgl, attrs);
51 return g->ops.mm.gpu_phys_addr(g, attrs,
52 nvgpu_mem_sgl_phys(sgl));
53
54 if (nvgpu_mem_sgl_dma(sgl) == DMA_ERROR_CODE)
55 return 0;
56
57 return gk20a_mm_smmu_vaddr_translate(g, nvgpu_mem_sgl_dma(sgl));
58} 46}
59 47
60void nvgpu_mem_sgl_free(struct gk20a *g, struct nvgpu_mem_sgl *sgl) 48void nvgpu_sgt_free(struct nvgpu_sgt *sgt, struct gk20a *g)
61{ 49{
62 struct nvgpu_mem_sgl *next; 50 if (sgt && sgt->ops->sgt_free)
63 51 sgt->ops->sgt_free(g, sgt);
64 /*
65 * Free each of the elements. We expect each element to have been
66 * nvgpu_k[mz]alloc()ed.
67 */
68 while (sgl) {
69 next = nvgpu_mem_sgl_next(sgl);
70 nvgpu_kfree(g, sgl);
71 sgl = next;
72 }
73} 52}
diff --git a/drivers/gpu/nvgpu/common/mm/page_allocator.c b/drivers/gpu/nvgpu/common/mm/page_allocator.c
index 6d92b457..9c35f528 100644
--- a/drivers/gpu/nvgpu/common/mm/page_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/page_allocator.c
@@ -143,20 +143,93 @@ static void nvgpu_page_release_co(struct nvgpu_allocator *a,
143 nvgpu_alloc_release_carveout(&va->source_allocator, co); 143 nvgpu_alloc_release_carveout(&va->source_allocator, co);
144} 144}
145 145
146static void *nvgpu_page_alloc_sgl_next(void *sgl)
147{
148 struct nvgpu_mem_sgl *nvgpu_sgl = sgl;
149
150 return nvgpu_sgl->next;
151}
152
153static u64 nvgpu_page_alloc_sgl_phys(void *sgl)
154{
155 struct nvgpu_mem_sgl *nvgpu_sgl = sgl;
156
157 return nvgpu_sgl->phys;
158}
159
160static u64 nvgpu_page_alloc_sgl_dma(void *sgl)
161{
162 struct nvgpu_mem_sgl *nvgpu_sgl = sgl;
163
164 return nvgpu_sgl->dma;
165}
166
167static u64 nvgpu_page_alloc_sgl_length(void *sgl)
168{
169 struct nvgpu_mem_sgl *nvgpu_sgl = sgl;
170
171 return nvgpu_sgl->length;
172}
173
174static u64 nvgpu_page_alloc_sgl_gpu_addr(struct gk20a *g, void *sgl,
175 struct nvgpu_gmmu_attrs *attrs)
176{
177 struct nvgpu_mem_sgl *nvgpu_sgl = sgl;
178
179 return nvgpu_sgl->phys;
180}
181
182static void nvgpu_page_alloc_sgt_free(struct gk20a *g, struct nvgpu_sgt *sgt)
183{
184 /*
185 * No-op here. The free is handled by the page_alloc free() functions.
186 */
187}
188
189/*
190 * These implement the generic scatter gather ops for pages allocated
191 * by the page allocator. however, the primary aim for this, is of course,
192 * vidmem.
193 */
194static const struct nvgpu_sgt_ops page_alloc_sgl_ops = {
195 .sgl_next = nvgpu_page_alloc_sgl_next,
196 .sgl_phys = nvgpu_page_alloc_sgl_phys,
197 .sgl_dma = nvgpu_page_alloc_sgl_dma,
198 .sgl_length = nvgpu_page_alloc_sgl_length,
199 .sgl_gpu_addr = nvgpu_page_alloc_sgl_gpu_addr,
200 .sgt_free = nvgpu_page_alloc_sgt_free,
201};
202
203/*
204 * This actually frees the sgl memory. Used by the page_alloc free() functions.
205 */
206static void nvgpu_page_alloc_sgl_proper_free(struct gk20a *g,
207 struct nvgpu_mem_sgl *sgl)
208{
209 struct nvgpu_mem_sgl *next;
210
211 while (sgl) {
212 next = sgl->next;
213 nvgpu_kfree(g, sgl);
214 sgl = next;
215 }
216}
217
146static void __nvgpu_free_pages(struct nvgpu_page_allocator *a, 218static void __nvgpu_free_pages(struct nvgpu_page_allocator *a,
147 struct nvgpu_page_alloc *alloc, 219 struct nvgpu_page_alloc *alloc,
148 bool free_buddy_alloc) 220 bool free_buddy_alloc)
149{ 221{
150 struct nvgpu_mem_sgl *sgl = alloc->sgl; 222 struct nvgpu_mem_sgl *sgl = alloc->sgt.sgl;
151 223
152 if (free_buddy_alloc) { 224 if (free_buddy_alloc) {
153 while (sgl) { 225 while (sgl) {
154 nvgpu_free(&a->source_allocator, sgl->phys); 226 nvgpu_free(&a->source_allocator,
155 sgl = nvgpu_mem_sgl_next(sgl); 227 nvgpu_sgt_get_phys(&alloc->sgt, sgl));
228 sgl = nvgpu_sgt_get_next(&alloc->sgt, sgl);
156 } 229 }
157 } 230 }
158 231
159 nvgpu_mem_sgl_free(a->owner->g, alloc->sgl); 232 nvgpu_page_alloc_sgl_proper_free(a->owner->g, sgl);
160 nvgpu_kmem_cache_free(a->alloc_cache, alloc); 233 nvgpu_kmem_cache_free(a->alloc_cache, alloc);
161} 234}
162 235
@@ -306,7 +379,7 @@ static int __do_slab_alloc(struct nvgpu_page_allocator *a,
306 alloc->length = slab_page->slab_size; 379 alloc->length = slab_page->slab_size;
307 alloc->base = slab_page->page_addr + (offs * slab_page->slab_size); 380 alloc->base = slab_page->page_addr + (offs * slab_page->slab_size);
308 381
309 sgl = alloc->sgl; 382 sgl = alloc->sgt.sgl;
310 sgl->phys = alloc->base; 383 sgl->phys = alloc->base;
311 sgl->dma = alloc->base; 384 sgl->dma = alloc->base;
312 sgl->length = alloc->length; 385 sgl->length = alloc->length;
@@ -338,13 +411,16 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_slab(
338 palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n"); 411 palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n");
339 goto fail; 412 goto fail;
340 } 413 }
414
415 alloc->sgt.ops = &page_alloc_sgl_ops;
416
341 sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl)); 417 sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl));
342 if (!sgl) { 418 if (!sgl) {
343 palloc_dbg(a, "OOM: could not alloc sgl struct!\n"); 419 palloc_dbg(a, "OOM: could not alloc sgl struct!\n");
344 goto fail; 420 goto fail;
345 } 421 }
346 422
347 alloc->sgl = sgl; 423 alloc->sgt.sgl = sgl;
348 err = __do_slab_alloc(a, slab, alloc); 424 err = __do_slab_alloc(a, slab, alloc);
349 if (err) 425 if (err)
350 goto fail; 426 goto fail;
@@ -432,6 +508,7 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages(
432 memset(alloc, 0, sizeof(*alloc)); 508 memset(alloc, 0, sizeof(*alloc));
433 509
434 alloc->length = pages << a->page_shift; 510 alloc->length = pages << a->page_shift;
511 alloc->sgt.ops = &page_alloc_sgl_ops;
435 512
436 while (pages) { 513 while (pages) {
437 u64 chunk_addr = 0; 514 u64 chunk_addr = 0;
@@ -495,7 +572,7 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages(
495 if (prev_sgl) 572 if (prev_sgl)
496 prev_sgl->next = sgl; 573 prev_sgl->next = sgl;
497 else 574 else
498 alloc->sgl = sgl; 575 alloc->sgt.sgl = sgl;
499 576
500 prev_sgl = sgl; 577 prev_sgl = sgl;
501 578
@@ -503,12 +580,12 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages(
503 } 580 }
504 581
505 alloc->nr_chunks = i; 582 alloc->nr_chunks = i;
506 alloc->base = alloc->sgl->phys; 583 alloc->base = ((struct nvgpu_mem_sgl *)alloc->sgt.sgl)->phys;
507 584
508 return alloc; 585 return alloc;
509 586
510fail_cleanup: 587fail_cleanup:
511 sgl = alloc->sgl; 588 sgl = alloc->sgt.sgl;
512 while (sgl) { 589 while (sgl) {
513 struct nvgpu_mem_sgl *next = sgl->next; 590 struct nvgpu_mem_sgl *next = sgl->next;
514 591
@@ -542,13 +619,13 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages(
542 619
543 palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n", 620 palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n",
544 pages << a->page_shift, pages, alloc->base); 621 pages << a->page_shift, pages, alloc->base);
545 sgl = alloc->sgl; 622 sgl = alloc->sgt.sgl;
546 while (sgl) { 623 while (sgl) {
547 palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", 624 palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n",
548 i++, 625 i++,
549 nvgpu_mem_sgl_phys(sgl), 626 nvgpu_sgt_get_phys(&alloc->sgt, sgl),
550 nvgpu_mem_sgl_length(sgl)); 627 nvgpu_sgt_get_length(&alloc->sgt, sgl));
551 sgl = sgl->next; 628 sgl = nvgpu_sgt_get_next(&alloc->sgt, sgl);
552 } 629 }
553 palloc_dbg(a, "Alloc done\n"); 630 palloc_dbg(a, "Alloc done\n");
554 631
@@ -655,6 +732,7 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed(
655 if (!alloc || !sgl) 732 if (!alloc || !sgl)
656 goto fail; 733 goto fail;
657 734
735 alloc->sgt.ops = &page_alloc_sgl_ops;
658 alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length, 0); 736 alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length, 0);
659 if (!alloc->base) { 737 if (!alloc->base) {
660 WARN(1, "nvgpu: failed to fixed alloc pages @ 0x%010llx", base); 738 WARN(1, "nvgpu: failed to fixed alloc pages @ 0x%010llx", base);
@@ -663,7 +741,7 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed(
663 741
664 alloc->nr_chunks = 1; 742 alloc->nr_chunks = 1;
665 alloc->length = length; 743 alloc->length = length;
666 alloc->sgl = sgl; 744 alloc->sgt.sgl = sgl;
667 745
668 sgl->phys = alloc->base; 746 sgl->phys = alloc->base;
669 sgl->dma = alloc->base; 747 sgl->dma = alloc->base;
@@ -708,13 +786,13 @@ static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a,
708 786
709 palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)\n", 787 palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)\n",
710 alloc->base, aligned_len, pages); 788 alloc->base, aligned_len, pages);
711 sgl = alloc->sgl; 789 sgl = alloc->sgt.sgl;
712 while (sgl) { 790 while (sgl) {
713 palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", 791 palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n",
714 i++, 792 i++,
715 nvgpu_mem_sgl_phys(sgl), 793 nvgpu_sgt_get_phys(&alloc->sgt, sgl),
716 nvgpu_mem_sgl_length(sgl)); 794 nvgpu_sgt_get_length(&alloc->sgt, sgl));
717 sgl = sgl->next; 795 sgl = nvgpu_sgt_get_next(&alloc->sgt, sgl);
718 } 796 }
719 797
720 a->nr_fixed_allocs++; 798 a->nr_fixed_allocs++;
diff --git a/drivers/gpu/nvgpu/common/pramin.c b/drivers/gpu/nvgpu/common/pramin.c
index bb7d930e..ae9c9b1f 100644
--- a/drivers/gpu/nvgpu/common/pramin.c
+++ b/drivers/gpu/nvgpu/common/pramin.c
@@ -84,24 +84,23 @@ void nvgpu_pramin_access_batched(struct gk20a *g, struct nvgpu_mem *mem,
84 u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg) 84 u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg)
85{ 85{
86 struct nvgpu_page_alloc *alloc = NULL; 86 struct nvgpu_page_alloc *alloc = NULL;
87 struct nvgpu_mem_sgl *sgl; 87 struct nvgpu_sgt *sgt;
88 void *sgl;
88 u32 byteoff, start_reg, until_end, n; 89 u32 byteoff, start_reg, until_end, n;
89 90
90 alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl); 91 alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl);
91 sgl = alloc->sgl; 92 sgt = &alloc->sgt;
92 while (sgl) { 93 for (sgl = sgt->sgl; sgl; sgl = nvgpu_sgt_get_next(sgt, sgl)) {
93 if (offset >= nvgpu_mem_sgl_length(sgl)) { 94 if (offset >= nvgpu_sgt_get_length(sgt, sgl))
94 offset -= nvgpu_mem_sgl_length(sgl); 95 offset -= nvgpu_sgt_get_length(sgt, sgl);
95 sgl = sgl->next; 96 else
96 } else {
97 break; 97 break;
98 }
99 } 98 }
100 99
101 while (size) { 100 while (size) {
102 u32 sgl_len = (u32)nvgpu_mem_sgl_length(sgl); 101 u32 sgl_len = (u32)nvgpu_sgt_get_length(sgt, sgl);
103 102
104 byteoff = g->ops.pramin.enter(g, mem, sgl, 103 byteoff = g->ops.pramin.enter(g, mem, sgt, sgl,
105 offset / sizeof(u32)); 104 offset / sizeof(u32));
106 start_reg = g->ops.pramin.data032_r(byteoff / sizeof(u32)); 105 start_reg = g->ops.pramin.data032_r(byteoff / sizeof(u32));
107 until_end = SZ_1M - (byteoff & (SZ_1M - 1)); 106 until_end = SZ_1M - (byteoff & (SZ_1M - 1));
@@ -117,7 +116,7 @@ void nvgpu_pramin_access_batched(struct gk20a *g, struct nvgpu_mem *mem,
117 size -= n; 116 size -= n;
118 117
119 if (n == (sgl_len - offset)) { 118 if (n == (sgl_len - offset)) {
120 sgl = nvgpu_mem_sgl_next(sgl); 119 sgl = nvgpu_sgt_get_next(sgt, sgl);
121 offset = 0; 120 offset = 0;
122 } else { 121 } else {
123 offset += n; 122 offset += n;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 355228db..13c62691 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -34,7 +34,7 @@ struct gk20a_debug_output;
34struct nvgpu_clk_pll_debug_data; 34struct nvgpu_clk_pll_debug_data;
35struct nvgpu_nvhost_dev; 35struct nvgpu_nvhost_dev;
36struct nvgpu_cpu_time_correlation_sample; 36struct nvgpu_cpu_time_correlation_sample;
37struct nvgpu_mem_sgl; 37struct nvgpu_mem_sgt;
38 38
39#include <nvgpu/lock.h> 39#include <nvgpu/lock.h>
40#include <nvgpu/thread.h> 40#include <nvgpu/thread.h>
@@ -700,7 +700,7 @@ struct gpu_ops {
700 bool (*support_sparse)(struct gk20a *g); 700 bool (*support_sparse)(struct gk20a *g);
701 u64 (*gmmu_map)(struct vm_gk20a *vm, 701 u64 (*gmmu_map)(struct vm_gk20a *vm,
702 u64 map_offset, 702 u64 map_offset,
703 struct nvgpu_mem_sgl *sgl, 703 struct nvgpu_sgt *sgt,
704 u64 buffer_offset, 704 u64 buffer_offset,
705 u64 size, 705 u64 size,
706 int pgsz_idx, 706 int pgsz_idx,
@@ -760,9 +760,9 @@ struct gpu_ops {
760 size_t size); 760 size_t size);
761 struct { 761 struct {
762 u32 (*enter)(struct gk20a *g, struct nvgpu_mem *mem, 762 u32 (*enter)(struct gk20a *g, struct nvgpu_mem *mem,
763 struct nvgpu_mem_sgl *sgl, u32 w); 763 struct nvgpu_sgt *sgt, void *sgl, u32 w);
764 void (*exit)(struct gk20a *g, struct nvgpu_mem *mem, 764 void (*exit)(struct gk20a *g, struct nvgpu_mem *mem,
765 struct nvgpu_mem_sgl *sgl); 765 void *sgl);
766 u32 (*data032_r)(u32 i); 766 u32 (*data032_r)(u32 i);
767 } pramin; 767 } pramin;
768 struct { 768 struct {
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index cd34e769..0e0326dd 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1151,7 +1151,8 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem)
1151 struct gk20a_fence *gk20a_fence_out = NULL; 1151 struct gk20a_fence *gk20a_fence_out = NULL;
1152 struct gk20a_fence *gk20a_last_fence = NULL; 1152 struct gk20a_fence *gk20a_last_fence = NULL;
1153 struct nvgpu_page_alloc *alloc = NULL; 1153 struct nvgpu_page_alloc *alloc = NULL;
1154 struct nvgpu_mem_sgl *sgl = NULL; 1154 struct nvgpu_sgt *sgt = NULL;
1155 void *sgl = NULL;
1155 int err = 0; 1156 int err = 0;
1156 1157
1157 if (g->mm.vidmem.ce_ctx_id == (u32)~0) 1158 if (g->mm.vidmem.ce_ctx_id == (u32)~0)
@@ -1159,7 +1160,8 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem)
1159 1160
1160 alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl); 1161 alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl);
1161 1162
1162 sgl = alloc->sgl; 1163 sgt = &alloc->sgt;
1164 sgl = sgt->sgl;
1163 while (sgl) { 1165 while (sgl) {
1164 if (gk20a_last_fence) 1166 if (gk20a_last_fence)
1165 gk20a_fence_put(gk20a_last_fence); 1167 gk20a_fence_put(gk20a_last_fence);
@@ -1167,8 +1169,8 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem)
1167 err = gk20a_ce_execute_ops(g, 1169 err = gk20a_ce_execute_ops(g,
1168 g->mm.vidmem.ce_ctx_id, 1170 g->mm.vidmem.ce_ctx_id,
1169 0, 1171 0,
1170 nvgpu_mem_sgl_phys(sgl), 1172 nvgpu_sgt_get_phys(sgt, sgl),
1171 nvgpu_mem_sgl_length(sgl), 1173 nvgpu_sgt_get_length(sgt, sgl),
1172 0x00000000, 1174 0x00000000,
1173 NVGPU_CE_DST_LOCATION_LOCAL_FB, 1175 NVGPU_CE_DST_LOCATION_LOCAL_FB,
1174 NVGPU_CE_MEMSET, 1176 NVGPU_CE_MEMSET,
@@ -1183,7 +1185,7 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem)
1183 } 1185 }
1184 1186
1185 gk20a_last_fence = gk20a_fence_out; 1187 gk20a_last_fence = gk20a_fence_out;
1186 sgl = nvgpu_mem_sgl_next(sgl); 1188 sgl = nvgpu_sgt_get_next(sgt, sgl);
1187 } 1189 }
1188 1190
1189 if (gk20a_last_fence) { 1191 if (gk20a_last_fence) {
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 2fdc1729..9c5e0fae 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -361,7 +361,7 @@ static inline phys_addr_t gk20a_mem_phys(struct nvgpu_mem *mem)
361 361
362u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, 362u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
363 u64 map_offset, 363 u64 map_offset,
364 struct nvgpu_mem_sgl *sgl, 364 struct nvgpu_sgt *sgt,
365 u64 buffer_offset, 365 u64 buffer_offset,
366 u64 size, 366 u64 size,
367 int pgsz_idx, 367 int pgsz_idx,
diff --git a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c
index 8a34a63c..aaba4ffc 100644
--- a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c
@@ -26,9 +26,9 @@
26 26
27/* WARNING: returns pramin_window_lock taken, complement with pramin_exit() */ 27/* WARNING: returns pramin_window_lock taken, complement with pramin_exit() */
28u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem, 28u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem,
29 struct nvgpu_mem_sgl *sgl, u32 w) 29 struct nvgpu_sgt *sgt, void *sgl, u32 w)
30{ 30{
31 u64 bufbase = nvgpu_mem_sgl_phys(sgl); 31 u64 bufbase = nvgpu_sgt_get_phys(sgt, sgl);
32 u64 addr = bufbase + w * sizeof(u32); 32 u64 addr = bufbase + w * sizeof(u32);
33 u32 hi = (u32)((addr & ~(u64)0xfffff) 33 u32 hi = (u32)((addr & ~(u64)0xfffff)
34 >> bus_bar0_window_target_bar0_window_base_shift_v()); 34 >> bus_bar0_window_target_bar0_window_base_shift_v());
@@ -41,8 +41,8 @@ u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem,
41 gk20a_dbg(gpu_dbg_mem, 41 gk20a_dbg(gpu_dbg_mem,
42 "0x%08x:%08x begin for %p,%p at [%llx,%llx] (sz %llx)", 42 "0x%08x:%08x begin for %p,%p at [%llx,%llx] (sz %llx)",
43 hi, lo, mem, sgl, bufbase, 43 hi, lo, mem, sgl, bufbase,
44 bufbase + nvgpu_mem_sgl_phys(sgl), 44 bufbase + nvgpu_sgt_get_phys(sgt, sgl),
45 nvgpu_mem_sgl_length(sgl)); 45 nvgpu_sgt_get_length(sgt, sgl));
46 46
47 WARN_ON(!bufbase); 47 WARN_ON(!bufbase);
48 48
@@ -58,7 +58,7 @@ u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem,
58} 58}
59 59
60void gk20a_pramin_exit(struct gk20a *g, struct nvgpu_mem *mem, 60void gk20a_pramin_exit(struct gk20a *g, struct nvgpu_mem *mem,
61 struct nvgpu_mem_sgl *sgl) 61 void *sgl)
62{ 62{
63 gk20a_dbg(gpu_dbg_mem, "end for %p,%p", mem, sgl); 63 gk20a_dbg(gpu_dbg_mem, "end for %p,%p", mem, sgl);
64 64
diff --git a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.h b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.h
index fc5ba919..29e76978 100644
--- a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.h
@@ -22,7 +22,7 @@ struct nvgpu_mem;
22struct nvgpu_mem_sgl; 22struct nvgpu_mem_sgl;
23 23
24u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem, 24u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem,
25 struct nvgpu_mem_sgl *sgl, u32 w); 25 struct nvgpu_sgt *sgt, void *sgl, u32 w);
26void gk20a_pramin_exit(struct gk20a *g, struct nvgpu_mem *mem, 26void gk20a_pramin_exit(struct gk20a *g, struct nvgpu_mem *mem,
27 struct nvgpu_mem_sgl *sgl); 27 void *sgl);
28#endif 28#endif
diff --git a/drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h
index f96c2801..517d834c 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h
@@ -20,6 +20,7 @@
20struct page; 20struct page;
21struct sg_table; 21struct sg_table;
22struct scatterlist; 22struct scatterlist;
23struct nvgpu_sgt;
23 24
24struct gk20a; 25struct gk20a;
25struct nvgpu_mem; 26struct nvgpu_mem;
@@ -32,9 +33,11 @@ struct nvgpu_mem_priv {
32}; 33};
33 34
34u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl); 35u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl);
35struct nvgpu_mem_sgl *nvgpu_mem_sgl_create(struct gk20a *g, 36struct nvgpu_sgt *nvgpu_mem_linux_sgt_create(struct gk20a *g,
37 struct sg_table *sgt);
38void nvgpu_mem_linux_sgt_free(struct gk20a *g, struct nvgpu_sgt *sgt);
39struct nvgpu_sgt *nvgpu_linux_sgt_create(struct gk20a *g,
36 struct sg_table *sgt); 40 struct sg_table *sgt);
37
38/** 41/**
39 * __nvgpu_mem_create_from_pages - Create an nvgpu_mem from physical pages. 42 * __nvgpu_mem_create_from_pages - Create an nvgpu_mem from physical pages.
40 * 43 *
diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
index 7d19cf81..beffbfe8 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
@@ -46,12 +46,41 @@ enum nvgpu_aperture {
46 APERTURE_VIDMEM 46 APERTURE_VIDMEM
47}; 47};
48 48
49struct nvgpu_sgt_ops {
50 void *(*sgl_next)(void *sgl);
51 u64 (*sgl_phys)(void *sgl);
52 u64 (*sgl_dma)(void *sgl);
53 u64 (*sgl_length)(void *sgl);
54 u64 (*sgl_gpu_addr)(struct gk20a *g, void *sgl,
55 struct nvgpu_gmmu_attrs *attrs);
56 /*
57 * Note: this operates on the whole SGT not a specific SGL entry.
58 */
59 void (*sgt_free)(struct gk20a *g, struct nvgpu_sgt *sgt);
60};
61
62/*
63 * Scatter gather table: this is a list of scatter list entries and the ops for
64 * interacting with those entries.
65 */
66struct nvgpu_sgt {
67 /*
68 * Ops for interacting with the underlying scatter gather list entries.
69 */
70 const struct nvgpu_sgt_ops *ops;
71
72 /*
73 * The first node in the scatter gather list.
74 */
75 void *sgl;
76};
77
49/* 78/*
50 * This struct holds the necessary information for describing a struct 79 * This struct holds the necessary information for describing a struct
51 * nvgpu_mem's scatter gather list. 80 * nvgpu_mem's scatter gather list.
52 * 81 *
53 * These are created in a platform dependent way. As a result the function 82 * Not all nvgpu_sgt's use this particular implementation. Nor is a given OS
54 * definition for allocating these lives in the <nvgpu/_OS_/nvgpu_mem.h> file. 83 * required to use this at all.
55 */ 84 */
56struct nvgpu_mem_sgl { 85struct nvgpu_mem_sgl {
57 /* 86 /*
@@ -164,6 +193,32 @@ static inline bool nvgpu_mem_is_valid(struct nvgpu_mem *mem)
164 193
165} 194}
166 195
196/*
197 * Create a nvgpu_sgt of the default implementation
198 */
199struct nvgpu_sgt *nvgpu_sgt_create(struct gk20a *g);
200
201/**
202 * nvgpu_mem_sgt_create_from_mem - Create a scatter list from an nvgpu_mem.
203 *
204 * @g - The GPU.
205 * @mem - The source memory allocation to use.
206 *
207 * Create a scatter gather table from the passed @mem struct. This list lets the
208 * calling code iterate across each chunk of a DMA allocation for when that DMA
209 * allocation is not completely contiguous.
210 */
211struct nvgpu_sgt *nvgpu_sgt_create_from_mem(struct gk20a *g,
212 struct nvgpu_mem *mem);
213
214void *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt, void *sgl);
215u64 nvgpu_sgt_get_phys(struct nvgpu_sgt *sgt, void *sgl);
216u64 nvgpu_sgt_get_dma(struct nvgpu_sgt *sgt, void *sgl);
217u64 nvgpu_sgt_get_length(struct nvgpu_sgt *sgt, void *sgl);
218u64 nvgpu_sgt_get_gpu_addr(struct nvgpu_sgt *sgt, struct gk20a *g, void *sgl,
219 struct nvgpu_gmmu_attrs *attrs);
220void nvgpu_sgt_free(struct nvgpu_sgt *sgt, struct gk20a *g);
221
167/** 222/**
168 * nvgpu_mem_create_from_mem - Create a new nvgpu_mem struct from an old one. 223 * nvgpu_mem_create_from_mem - Create a new nvgpu_mem struct from an old one.
169 * 224 *
@@ -200,27 +255,6 @@ int nvgpu_mem_create_from_mem(struct gk20a *g,
200 struct nvgpu_mem *dest, struct nvgpu_mem *src, 255 struct nvgpu_mem *dest, struct nvgpu_mem *src,
201 int start_page, int nr_pages); 256 int start_page, int nr_pages);
202 257
203/**
204 * nvgpu_mem_sgl_create_from_mem - Create a scatter list from an nvgpu_mem.
205 *
206 * @g - The GPU.
207 * @mem - The source memory allocation to use.
208 *
209 * Create a scatter gather list from the passed @mem struct. This list lets the
210 * calling code iterate across each chunk of a DMA allocation for when that DMA
211 * allocation is not completely contiguous.
212 */
213struct nvgpu_mem_sgl *nvgpu_mem_sgl_create_from_mem(struct gk20a *g,
214 struct nvgpu_mem *mem);
215void nvgpu_mem_sgl_free(struct gk20a *g, struct nvgpu_mem_sgl *sgl);
216
217struct nvgpu_mem_sgl *nvgpu_mem_sgl_next(struct nvgpu_mem_sgl *sgl);
218u64 nvgpu_mem_sgl_phys(struct nvgpu_mem_sgl *sgl);
219u64 nvgpu_mem_sgl_dma(struct nvgpu_mem_sgl *sgl);
220u64 nvgpu_mem_sgl_length(struct nvgpu_mem_sgl *sgl);
221u64 nvgpu_mem_sgl_gpu_addr(struct gk20a *g, struct nvgpu_mem_sgl *sgl,
222 struct nvgpu_gmmu_attrs *attrs);
223
224/* 258/*
225 * Buffer accessors - wrap between begin() and end() if there is no permanent 259 * Buffer accessors - wrap between begin() and end() if there is no permanent
226 * kernel mapping for this buffer. 260 * kernel mapping for this buffer.
diff --git a/drivers/gpu/nvgpu/include/nvgpu/page_allocator.h b/drivers/gpu/nvgpu/include/nvgpu/page_allocator.h
index de83ca7f..b22c55d0 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/page_allocator.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/page_allocator.h
@@ -91,10 +91,10 @@ page_alloc_slab_page_from_list_entry(struct nvgpu_list_node *node)
91 */ 91 */
92struct nvgpu_page_alloc { 92struct nvgpu_page_alloc {
93 /* 93 /*
94 * nvgpu_mem_sgl for describing the actual allocation. Convenient for 94 * nvgpu_sgt for describing the actual allocation. Convenient for
95 * GMMU mapping. 95 * GMMU mapping.
96 */ 96 */
97 struct nvgpu_mem_sgl *sgl; 97 struct nvgpu_sgt sgt;
98 98
99 int nr_chunks; 99 int nr_chunks;
100 u64 length; 100 u64 length;
diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c
index ee9b791a..d9324363 100644
--- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c
@@ -40,7 +40,7 @@ static inline int add_mem_desc(struct tegra_vgpu_mem_desc *mem_desc,
40 40
41static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm, 41static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
42 u64 map_offset, 42 u64 map_offset,
43 struct nvgpu_mem_sgl *sgl, 43 struct nvgpu_sgt *sgt,
44 u64 buffer_offset, 44 u64 buffer_offset,
45 u64 size, 45 u64 size,
46 int pgsz_idx, 46 int pgsz_idx,
@@ -66,12 +66,13 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
66 void *handle = NULL; 66 void *handle = NULL;
67 size_t oob_size; 67 size_t oob_size;
68 u8 prot; 68 u8 prot;
69 void *sgl;
69 70
70 gk20a_dbg_fn(""); 71 gk20a_dbg_fn("");
71 72
72 /* FIXME: add support for sparse mappings */ 73 /* FIXME: add support for sparse mappings */
73 74
74 if (WARN_ON(!sgl) || WARN_ON(!g->mm.bypass_smmu)) 75 if (WARN_ON(!sgt) || WARN_ON(!g->mm.bypass_smmu))
75 return 0; 76 return 0;
76 77
77 if (space_to_skip & (page_size - 1)) 78 if (space_to_skip & (page_size - 1))
@@ -97,7 +98,7 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
97 err = -EINVAL; 98 err = -EINVAL;
98 goto fail; 99 goto fail;
99 } 100 }
100 101 sgl = sgt->sgl;
101 while (sgl) { 102 while (sgl) {
102 u64 phys_addr; 103 u64 phys_addr;
103 u64 chunk_length; 104 u64 chunk_length;
@@ -106,15 +107,15 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
106 * Cut out sgl ents for space_to_skip. 107 * Cut out sgl ents for space_to_skip.
107 */ 108 */
108 if (space_to_skip && 109 if (space_to_skip &&
109 space_to_skip >= nvgpu_mem_sgl_length(sgl)) { 110 space_to_skip >= nvgpu_sgt_get_length(sgt, sgl)) {
110 space_to_skip -= nvgpu_mem_sgl_length(sgl); 111 space_to_skip -= nvgpu_sgt_get_length(sgt, sgl);
111 sgl = nvgpu_mem_sgl_next(sgl); 112 sgl = nvgpu_sgt_get_next(sgt, sgl);
112 continue; 113 continue;
113 } 114 }
114 115
115 phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip; 116 phys_addr = nvgpu_sgt_get_phys(sgt, sgl) + space_to_skip;
116 chunk_length = min(size, 117 chunk_length = min(size,
117 nvgpu_mem_sgl_length(sgl) - space_to_skip); 118 nvgpu_sgt_get_length(sgt, sgl) - space_to_skip);
118 119
119 if (add_mem_desc(&mem_desc[mem_desc_count++], phys_addr, 120 if (add_mem_desc(&mem_desc[mem_desc_count++], phys_addr,
120 chunk_length, &oob_size)) { 121 chunk_length, &oob_size)) {
@@ -124,7 +125,7 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
124 125
125 space_to_skip = 0; 126 space_to_skip = 0;
126 size -= chunk_length; 127 size -= chunk_length;
127 sgl = nvgpu_mem_sgl_next(sgl); 128 sgl = nvgpu_sgt_get_next(sgt, sgl);
128 129
129 if (size == 0) 130 if (size == 0)
130 break; 131 break;
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
index 5da6f158..adb01ae5 100644
--- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -78,7 +78,7 @@ int vgpu_init_mm_support(struct gk20a *g)
78 78
79static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm, 79static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
80 u64 map_offset, 80 u64 map_offset,
81 struct nvgpu_mem_sgl *sgl, 81 struct nvgpu_sgt *sgt,
82 u64 buffer_offset, 82 u64 buffer_offset,
83 u64 size, 83 u64 size,
84 int pgsz_idx, 84 int pgsz_idx,
@@ -98,7 +98,7 @@ static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
98 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d); 98 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d);
99 struct tegra_vgpu_cmd_msg msg; 99 struct tegra_vgpu_cmd_msg msg;
100 struct tegra_vgpu_as_map_params *p = &msg.params.as_map; 100 struct tegra_vgpu_as_map_params *p = &msg.params.as_map;
101 u64 addr = nvgpu_mem_sgl_gpu_addr(g, sgl, NULL); 101 u64 addr = nvgpu_sgt_get_gpu_addr(sgt, g, sgt->sgl, NULL);
102 u8 prot; 102 u8 prot;
103 103
104 gk20a_dbg_fn(""); 104 gk20a_dbg_fn("");