summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/common')
-rw-r--r--drivers/gpu/nvgpu/common/linux/nvgpu_mem.c144
-rw-r--r--drivers/gpu/nvgpu/common/linux/vm.c17
-rw-r--r--drivers/gpu/nvgpu/common/mm/gmmu.c67
-rw-r--r--drivers/gpu/nvgpu/common/mm/nvgpu_mem.c47
-rw-r--r--drivers/gpu/nvgpu/common/mm/page_allocator.c114
-rw-r--r--drivers/gpu/nvgpu/common/pramin.c21
6 files changed, 225 insertions, 185 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
index eb54f3fd..8d8909dd 100644
--- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
@@ -397,42 +397,59 @@ int __nvgpu_mem_create_from_pages(struct gk20a *g, struct nvgpu_mem *dest,
397 return 0; 397 return 0;
398} 398}
399 399
400static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_dup(struct gk20a *g, 400static void *nvgpu_mem_linux_sgl_next(void *sgl)
401 struct nvgpu_mem_sgl *sgl)
402{ 401{
403 struct nvgpu_mem_sgl *head, *next; 402 return sg_next((struct scatterlist *)sgl);
403}
404 404
405 head = nvgpu_kzalloc(g, sizeof(*sgl)); 405static u64 nvgpu_mem_linux_sgl_phys(void *sgl)
406 if (!head) 406{
407 return NULL; 407 return (u64)sg_phys((struct scatterlist *)sgl);
408}
408 409
409 next = head; 410static u64 nvgpu_mem_linux_sgl_dma(void *sgl)
410 while (true) { 411{
411 nvgpu_log(g, gpu_dbg_sgl, 412 return (u64)sg_dma_address((struct scatterlist *)sgl);
412 " phys: 0x%-12llx dma: 0x%-12llx len: 0x%llx", 413}
413 sgl->phys, sgl->dma, sgl->length);
414
415 next->dma = sgl->dma;
416 next->phys = sgl->phys;
417 next->length = sgl->length;
418 next->next = NULL;
419
420 sgl = nvgpu_mem_sgl_next(sgl);
421 if (!sgl)
422 break;
423
424 next->next = nvgpu_kzalloc(g, sizeof(*sgl));
425 if (!next->next) {
426 nvgpu_mem_sgl_free(g, head);
427 return NULL;
428 }
429 next = next->next;
430 }
431 414
432 return head; 415static u64 nvgpu_mem_linux_sgl_length(void *sgl)
416{
417 return (u64)((struct scatterlist *)sgl)->length;
433} 418}
434 419
435static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_create_from_vidmem( 420static u64 nvgpu_mem_linux_sgl_gpu_addr(struct gk20a *g, void *sgl,
421 struct nvgpu_gmmu_attrs *attrs)
422{
423 if (sg_dma_address((struct scatterlist *)sgl) == 0)
424 return g->ops.mm.gpu_phys_addr(g, attrs,
425 sg_phys((struct scatterlist *)sgl));
426
427 if (sg_dma_address((struct scatterlist *)sgl) == DMA_ERROR_CODE)
428 return 0;
429
430 return gk20a_mm_smmu_vaddr_translate(g,
431 sg_dma_address((struct scatterlist *)sgl));
432}
433
434static void nvgpu_mem_linux_sgl_free(struct gk20a *g, struct nvgpu_sgt *sgt)
435{
436 /*
437 * Free this SGT. All we do is free the passed SGT. The actual Linux
438 * SGT/SGL needs to be freed separately.
439 */
440 nvgpu_kfree(g, sgt);
441}
442
443static const struct nvgpu_sgt_ops nvgpu_linux_sgt_ops = {
444 .sgl_next = nvgpu_mem_linux_sgl_next,
445 .sgl_phys = nvgpu_mem_linux_sgl_phys,
446 .sgl_dma = nvgpu_mem_linux_sgl_dma,
447 .sgl_length = nvgpu_mem_linux_sgl_length,
448 .sgl_gpu_addr = nvgpu_mem_linux_sgl_gpu_addr,
449 .sgt_free = nvgpu_mem_linux_sgl_free,
450};
451
452static struct nvgpu_sgt *__nvgpu_mem_get_sgl_from_vidmem(
436 struct gk20a *g, 453 struct gk20a *g,
437 struct scatterlist *linux_sgl) 454 struct scatterlist *linux_sgl)
438{ 455{
@@ -442,70 +459,31 @@ static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_create_from_vidmem(
442 if (!vidmem_alloc) 459 if (!vidmem_alloc)
443 return NULL; 460 return NULL;
444 461
445 nvgpu_log(g, gpu_dbg_sgl, "Vidmem sgl:"); 462 return &vidmem_alloc->sgt;
446
447 return __nvgpu_mem_sgl_dup(g, vidmem_alloc->sgl);
448} 463}
449 464
450struct nvgpu_mem_sgl *nvgpu_mem_sgl_create(struct gk20a *g, 465struct nvgpu_sgt *nvgpu_linux_sgt_create(struct gk20a *g, struct sg_table *sgt)
451 struct sg_table *sgt)
452{ 466{
453 struct nvgpu_mem_sgl *head, *sgl, *next; 467 struct nvgpu_sgt *nvgpu_sgt;
454 struct scatterlist *linux_sgl = sgt->sgl; 468 struct scatterlist *linux_sgl = sgt->sgl;
455 469
456 if (is_vidmem_page_alloc(sg_dma_address(linux_sgl))) 470 if (is_vidmem_page_alloc(sg_dma_address(linux_sgl)))
457 return __nvgpu_mem_sgl_create_from_vidmem(g, linux_sgl); 471 return __nvgpu_mem_get_sgl_from_vidmem(g, linux_sgl);
458 472
459 head = nvgpu_kzalloc(g, sizeof(*sgl)); 473 nvgpu_sgt = nvgpu_kzalloc(g, sizeof(*nvgpu_sgt));
460 if (!head) 474 if (!nvgpu_sgt)
461 return NULL; 475 return NULL;
462 476
463 nvgpu_log(g, gpu_dbg_sgl, "Making sgl:"); 477 nvgpu_log(g, gpu_dbg_sgl, "Making Linux SGL!");
464 478
465 sgl = head; 479 nvgpu_sgt->sgl = sgt->sgl;
466 while (true) { 480 nvgpu_sgt->ops = &nvgpu_linux_sgt_ops;
467 sgl->dma = sg_dma_address(linux_sgl);
468 sgl->phys = sg_phys(linux_sgl);
469 sgl->length = linux_sgl->length;
470
471 /*
472 * We don't like offsets in the pages here. This will cause
473 * problems.
474 */
475 if (WARN_ON(linux_sgl->offset)) {
476 nvgpu_mem_sgl_free(g, head);
477 return NULL;
478 }
479
480 nvgpu_log(g, gpu_dbg_sgl,
481 " phys: 0x%-12llx dma: 0x%-12llx len: 0x%llx",
482 sgl->phys, sgl->dma, sgl->length);
483
484 /*
485 * When there's no more SGL ents for the Linux SGL we are
486 * done. Don't bother making any more SGL ents for the nvgpu
487 * SGL.
488 */
489 linux_sgl = sg_next(linux_sgl);
490 if (!linux_sgl)
491 break;
492
493 next = nvgpu_kzalloc(g, sizeof(*sgl));
494 if (!next) {
495 nvgpu_mem_sgl_free(g, head);
496 return NULL;
497 }
498
499 sgl->next = next;
500 sgl = next;
501 }
502 481
503 nvgpu_log(g, gpu_dbg_sgl, "Done!"); 482 return nvgpu_sgt;
504 return head;
505} 483}
506 484
507struct nvgpu_mem_sgl *nvgpu_mem_sgl_create_from_mem(struct gk20a *g, 485struct nvgpu_sgt *nvgpu_sgt_create_from_mem(struct gk20a *g,
508 struct nvgpu_mem *mem) 486 struct nvgpu_mem *mem)
509{ 487{
510 return nvgpu_mem_sgl_create(g, mem->priv.sgt); 488 return nvgpu_linux_sgt_create(g, mem->priv.sgt);
511} 489}
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c
index 4a4429dc..2e29f0f7 100644
--- a/drivers/gpu/nvgpu/common/linux/vm.c
+++ b/drivers/gpu/nvgpu/common/linux/vm.c
@@ -69,19 +69,20 @@ static u64 nvgpu_get_buffer_alignment(struct gk20a *g, struct scatterlist *sgl,
69 69
70 if (aperture == APERTURE_VIDMEM) { 70 if (aperture == APERTURE_VIDMEM) {
71 struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl); 71 struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl);
72 struct nvgpu_mem_sgl *sgl_vid = alloc->sgl; 72 struct nvgpu_sgt *sgt = &alloc->sgt;
73 void *sgl_vid = sgt->sgl;
73 74
74 while (sgl_vid) { 75 while (sgl_vid) {
75 chunk_align = 1ULL << 76 chunk_align = 1ULL <<
76 __ffs(nvgpu_mem_sgl_phys(sgl_vid) | 77 __ffs(nvgpu_sgt_get_phys(sgt, sgl_vid)) |
77 nvgpu_mem_sgl_length(sgl_vid)); 78 nvgpu_sgt_get_length(sgt, sgl_vid);
78 79
79 if (align) 80 if (align)
80 align = min(align, chunk_align); 81 align = min(align, chunk_align);
81 else 82 else
82 align = chunk_align; 83 align = chunk_align;
83 84
84 sgl_vid = nvgpu_mem_sgl_next(sgl_vid); 85 sgl_vid = nvgpu_sgt_get_next(sgt, sgl_vid);
85 } 86 }
86 87
87 return align; 88 return align;
@@ -242,7 +243,7 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
242 struct nvgpu_vm_area *vm_area = NULL; 243 struct nvgpu_vm_area *vm_area = NULL;
243 u32 ctag_offset; 244 u32 ctag_offset;
244 enum nvgpu_aperture aperture; 245 enum nvgpu_aperture aperture;
245 struct nvgpu_mem_sgl *nvgpu_sgl; 246 struct nvgpu_sgt *nvgpu_sgt;
246 247
247 /* 248 /*
248 * The kind used as part of the key for map caching. HW may 249 * The kind used as part of the key for map caching. HW may
@@ -399,12 +400,12 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
399 ctag_offset += buffer_offset >> 400 ctag_offset += buffer_offset >>
400 ilog2(g->ops.fb.compression_page_size(g)); 401 ilog2(g->ops.fb.compression_page_size(g));
401 402
402 nvgpu_sgl = nvgpu_mem_sgl_create(g, bfr.sgt); 403 nvgpu_sgt = nvgpu_linux_sgt_create(g, bfr.sgt);
403 404
404 /* update gmmu ptes */ 405 /* update gmmu ptes */
405 map_offset = g->ops.mm.gmmu_map(vm, 406 map_offset = g->ops.mm.gmmu_map(vm,
406 map_offset, 407 map_offset,
407 nvgpu_sgl, 408 nvgpu_sgt,
408 buffer_offset, /* sg offset */ 409 buffer_offset, /* sg offset */
409 mapping_size, 410 mapping_size,
410 bfr.pgsz_idx, 411 bfr.pgsz_idx,
@@ -419,7 +420,7 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
419 if (!map_offset) 420 if (!map_offset)
420 goto clean_up; 421 goto clean_up;
421 422
422 nvgpu_mem_sgl_free(g, nvgpu_sgl); 423 nvgpu_sgt_free(nvgpu_sgt, g);
423 424
424 mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer)); 425 mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer));
425 if (!mapped_buffer) { 426 if (!mapped_buffer) {
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
index 41f5acdd..66bce8f0 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -65,14 +65,14 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
65 struct gk20a *g = gk20a_from_vm(vm); 65 struct gk20a *g = gk20a_from_vm(vm);
66 u64 vaddr; 66 u64 vaddr;
67 67
68 struct nvgpu_mem_sgl *sgl = nvgpu_mem_sgl_create_from_mem(g, mem); 68 struct nvgpu_sgt *sgt = nvgpu_sgt_create_from_mem(g, mem);
69 69
70 if (!sgl) 70 if (!sgt)
71 return -ENOMEM; 71 return -ENOMEM;
72 72
73 nvgpu_mutex_acquire(&vm->update_gmmu_lock); 73 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
74 vaddr = g->ops.mm.gmmu_map(vm, addr, 74 vaddr = g->ops.mm.gmmu_map(vm, addr,
75 sgl, /* sg list */ 75 sgt, /* sg list */
76 0, /* sg offset */ 76 0, /* sg offset */
77 size, 77 size,
78 gmmu_page_size_kernel, 78 gmmu_page_size_kernel,
@@ -86,7 +86,7 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
86 aperture); 86 aperture);
87 nvgpu_mutex_release(&vm->update_gmmu_lock); 87 nvgpu_mutex_release(&vm->update_gmmu_lock);
88 88
89 nvgpu_mem_sgl_free(g, sgl); 89 nvgpu_sgt_free(sgt, g);
90 90
91 if (!vaddr) { 91 if (!vaddr) {
92 nvgpu_err(g, "failed to map buffer!"); 92 nvgpu_err(g, "failed to map buffer!");
@@ -464,7 +464,7 @@ static int __set_pd_level(struct vm_gk20a *vm,
464 * VIDMEM version of the update_ptes logic. 464 * VIDMEM version of the update_ptes logic.
465 */ 465 */
466static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm, 466static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm,
467 struct nvgpu_mem_sgl *sgl, 467 struct nvgpu_sgt *sgt,
468 u64 space_to_skip, 468 u64 space_to_skip,
469 u64 virt_addr, 469 u64 virt_addr,
470 u64 length, 470 u64 length,
@@ -472,8 +472,9 @@ static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm,
472{ 472{
473 u64 phys_addr, chunk_length; 473 u64 phys_addr, chunk_length;
474 int err = 0; 474 int err = 0;
475 void *sgl;
475 476
476 if (!sgl) { 477 if (!sgt) {
477 /* 478 /*
478 * This is considered an unmap. Just pass in 0 as the physical 479 * This is considered an unmap. Just pass in 0 as the physical
479 * address for the entire GPU range. 480 * address for the entire GPU range.
@@ -490,16 +491,17 @@ static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm,
490 * Otherwise iterate across all the chunks in this allocation and 491 * Otherwise iterate across all the chunks in this allocation and
491 * map them. 492 * map them.
492 */ 493 */
494 sgl = sgt->sgl;
493 while (sgl) { 495 while (sgl) {
494 if (space_to_skip && 496 if (space_to_skip &&
495 space_to_skip >= nvgpu_mem_sgl_length(sgl)) { 497 space_to_skip >= nvgpu_sgt_get_length(sgt, sgl)) {
496 space_to_skip -= nvgpu_mem_sgl_length(sgl); 498 space_to_skip -= nvgpu_sgt_get_length(sgt, sgl);
497 sgl = nvgpu_mem_sgl_next(sgl); 499 sgl = nvgpu_sgt_get_next(sgt, sgl);
498 continue; 500 continue;
499 } 501 }
500 502
501 phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip; 503 phys_addr = nvgpu_sgt_get_phys(sgt, sgl) + space_to_skip;
502 chunk_length = min(length, (nvgpu_mem_sgl_length(sgl) - 504 chunk_length = min(length, (nvgpu_sgt_get_length(sgt, sgl) -
503 space_to_skip)); 505 space_to_skip));
504 506
505 err = __set_pd_level(vm, &vm->pdb, 507 err = __set_pd_level(vm, &vm->pdb,
@@ -518,27 +520,27 @@ static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm,
518 */ 520 */
519 virt_addr += chunk_length; 521 virt_addr += chunk_length;
520 length -= chunk_length; 522 length -= chunk_length;
523 sgl = nvgpu_sgt_get_next(sgt, sgl);
521 524
522 if (length == 0) 525 if (length == 0)
523 break; 526 break;
524
525 sgl = nvgpu_mem_sgl_next(sgl);
526 } 527 }
527 528
528 return err; 529 return err;
529} 530}
530 531
531static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, 532static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
532 struct nvgpu_mem_sgl *sgl, 533 struct nvgpu_sgt *sgt,
533 u64 space_to_skip, 534 u64 space_to_skip,
534 u64 virt_addr, 535 u64 virt_addr,
535 u64 length, 536 u64 length,
536 struct nvgpu_gmmu_attrs *attrs) 537 struct nvgpu_gmmu_attrs *attrs)
537{ 538{
538 int err;
539 struct gk20a *g = gk20a_from_vm(vm); 539 struct gk20a *g = gk20a_from_vm(vm);
540 void *sgl;
541 int err;
540 542
541 if (!sgl) { 543 if (!sgt) {
542 /* 544 /*
543 * This is considered an unmap. Just pass in 0 as the physical 545 * This is considered an unmap. Just pass in 0 as the physical
544 * address for the entire GPU range. 546 * address for the entire GPU range.
@@ -559,8 +561,10 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
559 * mapping is simple since the "physical" address is actually a virtual 561 * mapping is simple since the "physical" address is actually a virtual
560 * IO address and will be contiguous. 562 * IO address and will be contiguous.
561 */ 563 */
564 sgl = sgt->sgl;
565
562 if (!g->mm.bypass_smmu) { 566 if (!g->mm.bypass_smmu) {
563 u64 io_addr = nvgpu_mem_sgl_gpu_addr(g, sgl, attrs); 567 u64 io_addr = nvgpu_sgt_get_gpu_addr(sgt, g, sgl, attrs);
564 568
565 io_addr += space_to_skip; 569 io_addr += space_to_skip;
566 570
@@ -586,15 +590,15 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
586 * Cut out sgl ents for space_to_skip. 590 * Cut out sgl ents for space_to_skip.
587 */ 591 */
588 if (space_to_skip && 592 if (space_to_skip &&
589 space_to_skip >= nvgpu_mem_sgl_length(sgl)) { 593 space_to_skip >= nvgpu_sgt_get_length(sgt, sgl)) {
590 space_to_skip -= nvgpu_mem_sgl_length(sgl); 594 space_to_skip -= nvgpu_sgt_get_length(sgt, sgl);
591 sgl = nvgpu_mem_sgl_next(sgl); 595 sgl = nvgpu_sgt_get_next(sgt, sgl);
592 continue; 596 continue;
593 } 597 }
594 598
595 phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip; 599 phys_addr = nvgpu_sgt_get_phys(sgt, sgl) + space_to_skip;
596 chunk_length = min(length, 600 chunk_length = min(length,
597 nvgpu_mem_sgl_length(sgl) - space_to_skip); 601 nvgpu_sgt_get_length(sgt, sgl) - space_to_skip);
598 602
599 err = __set_pd_level(vm, &vm->pdb, 603 err = __set_pd_level(vm, &vm->pdb,
600 0, 604 0,
@@ -606,7 +610,7 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
606 space_to_skip = 0; 610 space_to_skip = 0;
607 virt_addr += chunk_length; 611 virt_addr += chunk_length;
608 length -= chunk_length; 612 length -= chunk_length;
609 sgl = nvgpu_mem_sgl_next(sgl); 613 sgl = nvgpu_sgt_get_next(sgt, sgl);
610 614
611 if (length == 0) 615 if (length == 0)
612 break; 616 break;
@@ -631,7 +635,7 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
631 * case of SMMU usage. 635 * case of SMMU usage.
632 */ 636 */
633static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, 637static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
634 struct nvgpu_mem_sgl *sgl, 638 struct nvgpu_sgt *sgt,
635 u64 space_to_skip, 639 u64 space_to_skip,
636 u64 virt_addr, 640 u64 virt_addr,
637 u64 length, 641 u64 length,
@@ -669,10 +673,10 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
669 "phys offset: %#-4llx; pgsz: %3dkb perm=%-2s | " 673 "phys offset: %#-4llx; pgsz: %3dkb perm=%-2s | "
670 "kind=%#02x APT=%-6s %c%c%c%c%c", 674 "kind=%#02x APT=%-6s %c%c%c%c%c",
671 vm->name, 675 vm->name,
672 sgl ? "MAP" : "UNMAP", 676 sgt ? "MAP" : "UNMAP",
673 virt_addr, 677 virt_addr,
674 length, 678 length,
675 sgl ? nvgpu_mem_sgl_phys(sgl) : 0, 679 sgt ? nvgpu_sgt_get_phys(sgt, sgt->sgl) : 0,
676 space_to_skip, 680 space_to_skip,
677 page_size >> 10, 681 page_size >> 10,
678 nvgpu_gmmu_perm_str(attrs->rw_flag), 682 nvgpu_gmmu_perm_str(attrs->rw_flag),
@@ -690,14 +694,14 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
690 */ 694 */
691 if (attrs->aperture == APERTURE_VIDMEM) 695 if (attrs->aperture == APERTURE_VIDMEM)
692 err = __nvgpu_gmmu_update_page_table_vidmem(vm, 696 err = __nvgpu_gmmu_update_page_table_vidmem(vm,
693 sgl, 697 sgt,
694 space_to_skip, 698 space_to_skip,
695 virt_addr, 699 virt_addr,
696 length, 700 length,
697 attrs); 701 attrs);
698 else 702 else
699 err = __nvgpu_gmmu_update_page_table_sysmem(vm, 703 err = __nvgpu_gmmu_update_page_table_sysmem(vm,
700 sgl, 704 sgt,
701 space_to_skip, 705 space_to_skip,
702 virt_addr, 706 virt_addr,
703 length, 707 length,
@@ -706,7 +710,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
706 unmap_gmmu_pages(g, &vm->pdb); 710 unmap_gmmu_pages(g, &vm->pdb);
707 nvgpu_smp_mb(); 711 nvgpu_smp_mb();
708 712
709 __gmmu_dbg(g, attrs, "%-5s Done!", sgl ? "MAP" : "UNMAP"); 713 __gmmu_dbg(g, attrs, "%-5s Done!", sgt ? "MAP" : "UNMAP");
710 714
711 return err; 715 return err;
712} 716}
@@ -725,7 +729,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
725 */ 729 */
726u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, 730u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
727 u64 vaddr, 731 u64 vaddr,
728 struct nvgpu_mem_sgl *sgl, 732 struct nvgpu_sgt *sgt,
729 u64 buffer_offset, 733 u64 buffer_offset,
730 u64 size, 734 u64 size,
731 int pgsz_idx, 735 int pgsz_idx,
@@ -774,7 +778,7 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
774 allocated = true; 778 allocated = true;
775 } 779 }
776 780
777 err = __nvgpu_gmmu_update_page_table(vm, sgl, buffer_offset, 781 err = __nvgpu_gmmu_update_page_table(vm, sgt, buffer_offset,
778 vaddr, size, &attrs); 782 vaddr, size, &attrs);
779 if (err) { 783 if (err) {
780 nvgpu_err(g, "failed to update ptes on map"); 784 nvgpu_err(g, "failed to update ptes on map");
@@ -787,6 +791,7 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
787 batch->need_tlb_invalidate = true; 791 batch->need_tlb_invalidate = true;
788 792
789 return vaddr; 793 return vaddr;
794
790fail_validate: 795fail_validate:
791 if (allocated) 796 if (allocated)
792 __nvgpu_vm_free_va(vm, vaddr, pgsz_idx); 797 __nvgpu_vm_free_va(vm, vaddr, pgsz_idx);
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
index 7296c673..6decec24 100644
--- a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
@@ -19,55 +19,34 @@
19 19
20#include "gk20a/gk20a.h" 20#include "gk20a/gk20a.h"
21 21
22struct nvgpu_mem_sgl *nvgpu_mem_sgl_next(struct nvgpu_mem_sgl *sgl) 22void *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt, void *sgl)
23{ 23{
24 return sgl->next; 24 return sgt->ops->sgl_next(sgl);
25} 25}
26 26
27u64 nvgpu_mem_sgl_phys(struct nvgpu_mem_sgl *sgl) 27u64 nvgpu_sgt_get_phys(struct nvgpu_sgt *sgt, void *sgl)
28{ 28{
29 return sgl->phys; 29 return sgt->ops->sgl_phys(sgl);
30} 30}
31 31
32u64 nvgpu_mem_sgl_dma(struct nvgpu_mem_sgl *sgl) 32u64 nvgpu_sgt_get_dma(struct nvgpu_sgt *sgt, void *sgl)
33{ 33{
34 return sgl->dma; 34 return sgt->ops->sgl_dma(sgl);
35} 35}
36 36
37u64 nvgpu_mem_sgl_length(struct nvgpu_mem_sgl *sgl) 37u64 nvgpu_sgt_get_length(struct nvgpu_sgt *sgt, void *sgl)
38{ 38{
39 return sgl->length; 39 return sgt->ops->sgl_length(sgl);
40} 40}
41 41
42/* 42u64 nvgpu_sgt_get_gpu_addr(struct nvgpu_sgt *sgt, struct gk20a *g, void *sgl,
43 * This builds a GPU address for the %sgl based on whether an IOMMU is present
44 * or not. It also handles turning the physical address into the true GPU
45 * physical address that should be programmed into the page tables.
46 */
47u64 nvgpu_mem_sgl_gpu_addr(struct gk20a *g, struct nvgpu_mem_sgl *sgl,
48 struct nvgpu_gmmu_attrs *attrs) 43 struct nvgpu_gmmu_attrs *attrs)
49{ 44{
50 if (nvgpu_mem_sgl_dma(sgl) == 0) 45 return sgt->ops->sgl_gpu_addr(g, sgl, attrs);
51 return g->ops.mm.gpu_phys_addr(g, attrs,
52 nvgpu_mem_sgl_phys(sgl));
53
54 if (nvgpu_mem_sgl_dma(sgl) == DMA_ERROR_CODE)
55 return 0;
56
57 return gk20a_mm_smmu_vaddr_translate(g, nvgpu_mem_sgl_dma(sgl));
58} 46}
59 47
60void nvgpu_mem_sgl_free(struct gk20a *g, struct nvgpu_mem_sgl *sgl) 48void nvgpu_sgt_free(struct nvgpu_sgt *sgt, struct gk20a *g)
61{ 49{
62 struct nvgpu_mem_sgl *next; 50 if (sgt && sgt->ops->sgt_free)
63 51 sgt->ops->sgt_free(g, sgt);
64 /*
65 * Free each of the elements. We expect each element to have been
66 * nvgpu_k[mz]alloc()ed.
67 */
68 while (sgl) {
69 next = nvgpu_mem_sgl_next(sgl);
70 nvgpu_kfree(g, sgl);
71 sgl = next;
72 }
73} 52}
diff --git a/drivers/gpu/nvgpu/common/mm/page_allocator.c b/drivers/gpu/nvgpu/common/mm/page_allocator.c
index 6d92b457..9c35f528 100644
--- a/drivers/gpu/nvgpu/common/mm/page_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/page_allocator.c
@@ -143,20 +143,93 @@ static void nvgpu_page_release_co(struct nvgpu_allocator *a,
143 nvgpu_alloc_release_carveout(&va->source_allocator, co); 143 nvgpu_alloc_release_carveout(&va->source_allocator, co);
144} 144}
145 145
146static void *nvgpu_page_alloc_sgl_next(void *sgl)
147{
148 struct nvgpu_mem_sgl *nvgpu_sgl = sgl;
149
150 return nvgpu_sgl->next;
151}
152
153static u64 nvgpu_page_alloc_sgl_phys(void *sgl)
154{
155 struct nvgpu_mem_sgl *nvgpu_sgl = sgl;
156
157 return nvgpu_sgl->phys;
158}
159
160static u64 nvgpu_page_alloc_sgl_dma(void *sgl)
161{
162 struct nvgpu_mem_sgl *nvgpu_sgl = sgl;
163
164 return nvgpu_sgl->dma;
165}
166
167static u64 nvgpu_page_alloc_sgl_length(void *sgl)
168{
169 struct nvgpu_mem_sgl *nvgpu_sgl = sgl;
170
171 return nvgpu_sgl->length;
172}
173
174static u64 nvgpu_page_alloc_sgl_gpu_addr(struct gk20a *g, void *sgl,
175 struct nvgpu_gmmu_attrs *attrs)
176{
177 struct nvgpu_mem_sgl *nvgpu_sgl = sgl;
178
179 return nvgpu_sgl->phys;
180}
181
182static void nvgpu_page_alloc_sgt_free(struct gk20a *g, struct nvgpu_sgt *sgt)
183{
184 /*
185 * No-op here. The free is handled by the page_alloc free() functions.
186 */
187}
188
189/*
190 * These implement the generic scatter gather ops for pages allocated
191 * by the page allocator. however, the primary aim for this, is of course,
192 * vidmem.
193 */
194static const struct nvgpu_sgt_ops page_alloc_sgl_ops = {
195 .sgl_next = nvgpu_page_alloc_sgl_next,
196 .sgl_phys = nvgpu_page_alloc_sgl_phys,
197 .sgl_dma = nvgpu_page_alloc_sgl_dma,
198 .sgl_length = nvgpu_page_alloc_sgl_length,
199 .sgl_gpu_addr = nvgpu_page_alloc_sgl_gpu_addr,
200 .sgt_free = nvgpu_page_alloc_sgt_free,
201};
202
203/*
204 * This actually frees the sgl memory. Used by the page_alloc free() functions.
205 */
206static void nvgpu_page_alloc_sgl_proper_free(struct gk20a *g,
207 struct nvgpu_mem_sgl *sgl)
208{
209 struct nvgpu_mem_sgl *next;
210
211 while (sgl) {
212 next = sgl->next;
213 nvgpu_kfree(g, sgl);
214 sgl = next;
215 }
216}
217
146static void __nvgpu_free_pages(struct nvgpu_page_allocator *a, 218static void __nvgpu_free_pages(struct nvgpu_page_allocator *a,
147 struct nvgpu_page_alloc *alloc, 219 struct nvgpu_page_alloc *alloc,
148 bool free_buddy_alloc) 220 bool free_buddy_alloc)
149{ 221{
150 struct nvgpu_mem_sgl *sgl = alloc->sgl; 222 struct nvgpu_mem_sgl *sgl = alloc->sgt.sgl;
151 223
152 if (free_buddy_alloc) { 224 if (free_buddy_alloc) {
153 while (sgl) { 225 while (sgl) {
154 nvgpu_free(&a->source_allocator, sgl->phys); 226 nvgpu_free(&a->source_allocator,
155 sgl = nvgpu_mem_sgl_next(sgl); 227 nvgpu_sgt_get_phys(&alloc->sgt, sgl));
228 sgl = nvgpu_sgt_get_next(&alloc->sgt, sgl);
156 } 229 }
157 } 230 }
158 231
159 nvgpu_mem_sgl_free(a->owner->g, alloc->sgl); 232 nvgpu_page_alloc_sgl_proper_free(a->owner->g, sgl);
160 nvgpu_kmem_cache_free(a->alloc_cache, alloc); 233 nvgpu_kmem_cache_free(a->alloc_cache, alloc);
161} 234}
162 235
@@ -306,7 +379,7 @@ static int __do_slab_alloc(struct nvgpu_page_allocator *a,
306 alloc->length = slab_page->slab_size; 379 alloc->length = slab_page->slab_size;
307 alloc->base = slab_page->page_addr + (offs * slab_page->slab_size); 380 alloc->base = slab_page->page_addr + (offs * slab_page->slab_size);
308 381
309 sgl = alloc->sgl; 382 sgl = alloc->sgt.sgl;
310 sgl->phys = alloc->base; 383 sgl->phys = alloc->base;
311 sgl->dma = alloc->base; 384 sgl->dma = alloc->base;
312 sgl->length = alloc->length; 385 sgl->length = alloc->length;
@@ -338,13 +411,16 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_slab(
338 palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n"); 411 palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n");
339 goto fail; 412 goto fail;
340 } 413 }
414
415 alloc->sgt.ops = &page_alloc_sgl_ops;
416
341 sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl)); 417 sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl));
342 if (!sgl) { 418 if (!sgl) {
343 palloc_dbg(a, "OOM: could not alloc sgl struct!\n"); 419 palloc_dbg(a, "OOM: could not alloc sgl struct!\n");
344 goto fail; 420 goto fail;
345 } 421 }
346 422
347 alloc->sgl = sgl; 423 alloc->sgt.sgl = sgl;
348 err = __do_slab_alloc(a, slab, alloc); 424 err = __do_slab_alloc(a, slab, alloc);
349 if (err) 425 if (err)
350 goto fail; 426 goto fail;
@@ -432,6 +508,7 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages(
432 memset(alloc, 0, sizeof(*alloc)); 508 memset(alloc, 0, sizeof(*alloc));
433 509
434 alloc->length = pages << a->page_shift; 510 alloc->length = pages << a->page_shift;
511 alloc->sgt.ops = &page_alloc_sgl_ops;
435 512
436 while (pages) { 513 while (pages) {
437 u64 chunk_addr = 0; 514 u64 chunk_addr = 0;
@@ -495,7 +572,7 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages(
495 if (prev_sgl) 572 if (prev_sgl)
496 prev_sgl->next = sgl; 573 prev_sgl->next = sgl;
497 else 574 else
498 alloc->sgl = sgl; 575 alloc->sgt.sgl = sgl;
499 576
500 prev_sgl = sgl; 577 prev_sgl = sgl;
501 578
@@ -503,12 +580,12 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages(
503 } 580 }
504 581
505 alloc->nr_chunks = i; 582 alloc->nr_chunks = i;
506 alloc->base = alloc->sgl->phys; 583 alloc->base = ((struct nvgpu_mem_sgl *)alloc->sgt.sgl)->phys;
507 584
508 return alloc; 585 return alloc;
509 586
510fail_cleanup: 587fail_cleanup:
511 sgl = alloc->sgl; 588 sgl = alloc->sgt.sgl;
512 while (sgl) { 589 while (sgl) {
513 struct nvgpu_mem_sgl *next = sgl->next; 590 struct nvgpu_mem_sgl *next = sgl->next;
514 591
@@ -542,13 +619,13 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages(
542 619
543 palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n", 620 palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n",
544 pages << a->page_shift, pages, alloc->base); 621 pages << a->page_shift, pages, alloc->base);
545 sgl = alloc->sgl; 622 sgl = alloc->sgt.sgl;
546 while (sgl) { 623 while (sgl) {
547 palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", 624 palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n",
548 i++, 625 i++,
549 nvgpu_mem_sgl_phys(sgl), 626 nvgpu_sgt_get_phys(&alloc->sgt, sgl),
550 nvgpu_mem_sgl_length(sgl)); 627 nvgpu_sgt_get_length(&alloc->sgt, sgl));
551 sgl = sgl->next; 628 sgl = nvgpu_sgt_get_next(&alloc->sgt, sgl);
552 } 629 }
553 palloc_dbg(a, "Alloc done\n"); 630 palloc_dbg(a, "Alloc done\n");
554 631
@@ -655,6 +732,7 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed(
655 if (!alloc || !sgl) 732 if (!alloc || !sgl)
656 goto fail; 733 goto fail;
657 734
735 alloc->sgt.ops = &page_alloc_sgl_ops;
658 alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length, 0); 736 alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length, 0);
659 if (!alloc->base) { 737 if (!alloc->base) {
660 WARN(1, "nvgpu: failed to fixed alloc pages @ 0x%010llx", base); 738 WARN(1, "nvgpu: failed to fixed alloc pages @ 0x%010llx", base);
@@ -663,7 +741,7 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed(
663 741
664 alloc->nr_chunks = 1; 742 alloc->nr_chunks = 1;
665 alloc->length = length; 743 alloc->length = length;
666 alloc->sgl = sgl; 744 alloc->sgt.sgl = sgl;
667 745
668 sgl->phys = alloc->base; 746 sgl->phys = alloc->base;
669 sgl->dma = alloc->base; 747 sgl->dma = alloc->base;
@@ -708,13 +786,13 @@ static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a,
708 786
709 palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)\n", 787 palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)\n",
710 alloc->base, aligned_len, pages); 788 alloc->base, aligned_len, pages);
711 sgl = alloc->sgl; 789 sgl = alloc->sgt.sgl;
712 while (sgl) { 790 while (sgl) {
713 palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", 791 palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n",
714 i++, 792 i++,
715 nvgpu_mem_sgl_phys(sgl), 793 nvgpu_sgt_get_phys(&alloc->sgt, sgl),
716 nvgpu_mem_sgl_length(sgl)); 794 nvgpu_sgt_get_length(&alloc->sgt, sgl));
717 sgl = sgl->next; 795 sgl = nvgpu_sgt_get_next(&alloc->sgt, sgl);
718 } 796 }
719 797
720 a->nr_fixed_allocs++; 798 a->nr_fixed_allocs++;
diff --git a/drivers/gpu/nvgpu/common/pramin.c b/drivers/gpu/nvgpu/common/pramin.c
index bb7d930e..ae9c9b1f 100644
--- a/drivers/gpu/nvgpu/common/pramin.c
+++ b/drivers/gpu/nvgpu/common/pramin.c
@@ -84,24 +84,23 @@ void nvgpu_pramin_access_batched(struct gk20a *g, struct nvgpu_mem *mem,
84 u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg) 84 u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg)
85{ 85{
86 struct nvgpu_page_alloc *alloc = NULL; 86 struct nvgpu_page_alloc *alloc = NULL;
87 struct nvgpu_mem_sgl *sgl; 87 struct nvgpu_sgt *sgt;
88 void *sgl;
88 u32 byteoff, start_reg, until_end, n; 89 u32 byteoff, start_reg, until_end, n;
89 90
90 alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl); 91 alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl);
91 sgl = alloc->sgl; 92 sgt = &alloc->sgt;
92 while (sgl) { 93 for (sgl = sgt->sgl; sgl; sgl = nvgpu_sgt_get_next(sgt, sgl)) {
93 if (offset >= nvgpu_mem_sgl_length(sgl)) { 94 if (offset >= nvgpu_sgt_get_length(sgt, sgl))
94 offset -= nvgpu_mem_sgl_length(sgl); 95 offset -= nvgpu_sgt_get_length(sgt, sgl);
95 sgl = sgl->next; 96 else
96 } else {
97 break; 97 break;
98 }
99 } 98 }
100 99
101 while (size) { 100 while (size) {
102 u32 sgl_len = (u32)nvgpu_mem_sgl_length(sgl); 101 u32 sgl_len = (u32)nvgpu_sgt_get_length(sgt, sgl);
103 102
104 byteoff = g->ops.pramin.enter(g, mem, sgl, 103 byteoff = g->ops.pramin.enter(g, mem, sgt, sgl,
105 offset / sizeof(u32)); 104 offset / sizeof(u32));
106 start_reg = g->ops.pramin.data032_r(byteoff / sizeof(u32)); 105 start_reg = g->ops.pramin.data032_r(byteoff / sizeof(u32));
107 until_end = SZ_1M - (byteoff & (SZ_1M - 1)); 106 until_end = SZ_1M - (byteoff & (SZ_1M - 1));
@@ -117,7 +116,7 @@ void nvgpu_pramin_access_batched(struct gk20a *g, struct nvgpu_mem *mem,
117 size -= n; 116 size -= n;
118 117
119 if (n == (sgl_len - offset)) { 118 if (n == (sgl_len - offset)) {
120 sgl = nvgpu_mem_sgl_next(sgl); 119 sgl = nvgpu_sgt_get_next(sgt, sgl);
121 offset = 0; 120 offset = 0;
122 } else { 121 } else {
123 offset += n; 122 offset += n;