summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common
diff options
context:
space:
mode:
authorSunny He <suhe@nvidia.com>2017-08-15 15:01:04 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-09-22 15:55:24 -0400
commit17c581d75514c32d1e8c1e416beb33b3ccce22a5 (patch)
treea25d063f19b8e1f83f61af418f3aa2ac32fe0cce /drivers/gpu/nvgpu/common
parent0090ee5aca268a3c359f34c74b8c521df3bd8593 (diff)
gpu: nvgpu: SGL passthrough implementation
The basic nvgpu_mem_sgl implementation provides support for OS specific scatter-gather list implementations by simply copying them node by node. This is inefficient, taking extra time and memory. This patch implements an nvgpu_mem_sgt struct to act as a header which is inserted at the front of any scatter- gather list implementation. This labels every struct with a set of ops which can be used to interact with the attached scatter gather list. Since nvgpu common code only has to interact with these function pointers, any sgl implementation can be used. Initialization only requires the allocation of a single struct, removing the need to copy or iterate through the sgl being converted. Jira NVGPU-186 Change-Id: I2994f804a4a4cc141b702e987e9081d8560ba2e8 Signed-off-by: Sunny He <suhe@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1541426 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common')
-rw-r--r--drivers/gpu/nvgpu/common/linux/nvgpu_mem.c144
-rw-r--r--drivers/gpu/nvgpu/common/linux/vm.c17
-rw-r--r--drivers/gpu/nvgpu/common/mm/gmmu.c67
-rw-r--r--drivers/gpu/nvgpu/common/mm/nvgpu_mem.c47
-rw-r--r--drivers/gpu/nvgpu/common/mm/page_allocator.c114
-rw-r--r--drivers/gpu/nvgpu/common/pramin.c21
6 files changed, 225 insertions, 185 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
index eb54f3fd..8d8909dd 100644
--- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
@@ -397,42 +397,59 @@ int __nvgpu_mem_create_from_pages(struct gk20a *g, struct nvgpu_mem *dest,
397 return 0; 397 return 0;
398} 398}
399 399
400static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_dup(struct gk20a *g, 400static void *nvgpu_mem_linux_sgl_next(void *sgl)
401 struct nvgpu_mem_sgl *sgl)
402{ 401{
403 struct nvgpu_mem_sgl *head, *next; 402 return sg_next((struct scatterlist *)sgl);
403}
404 404
405 head = nvgpu_kzalloc(g, sizeof(*sgl)); 405static u64 nvgpu_mem_linux_sgl_phys(void *sgl)
406 if (!head) 406{
407 return NULL; 407 return (u64)sg_phys((struct scatterlist *)sgl);
408}
408 409
409 next = head; 410static u64 nvgpu_mem_linux_sgl_dma(void *sgl)
410 while (true) { 411{
411 nvgpu_log(g, gpu_dbg_sgl, 412 return (u64)sg_dma_address((struct scatterlist *)sgl);
412 " phys: 0x%-12llx dma: 0x%-12llx len: 0x%llx", 413}
413 sgl->phys, sgl->dma, sgl->length);
414
415 next->dma = sgl->dma;
416 next->phys = sgl->phys;
417 next->length = sgl->length;
418 next->next = NULL;
419
420 sgl = nvgpu_mem_sgl_next(sgl);
421 if (!sgl)
422 break;
423
424 next->next = nvgpu_kzalloc(g, sizeof(*sgl));
425 if (!next->next) {
426 nvgpu_mem_sgl_free(g, head);
427 return NULL;
428 }
429 next = next->next;
430 }
431 414
432 return head; 415static u64 nvgpu_mem_linux_sgl_length(void *sgl)
416{
417 return (u64)((struct scatterlist *)sgl)->length;
433} 418}
434 419
435static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_create_from_vidmem( 420static u64 nvgpu_mem_linux_sgl_gpu_addr(struct gk20a *g, void *sgl,
421 struct nvgpu_gmmu_attrs *attrs)
422{
423 if (sg_dma_address((struct scatterlist *)sgl) == 0)
424 return g->ops.mm.gpu_phys_addr(g, attrs,
425 sg_phys((struct scatterlist *)sgl));
426
427 if (sg_dma_address((struct scatterlist *)sgl) == DMA_ERROR_CODE)
428 return 0;
429
430 return gk20a_mm_smmu_vaddr_translate(g,
431 sg_dma_address((struct scatterlist *)sgl));
432}
433
434static void nvgpu_mem_linux_sgl_free(struct gk20a *g, struct nvgpu_sgt *sgt)
435{
436 /*
437 * Free this SGT. All we do is free the passed SGT. The actual Linux
438 * SGT/SGL needs to be freed separately.
439 */
440 nvgpu_kfree(g, sgt);
441}
442
443static const struct nvgpu_sgt_ops nvgpu_linux_sgt_ops = {
444 .sgl_next = nvgpu_mem_linux_sgl_next,
445 .sgl_phys = nvgpu_mem_linux_sgl_phys,
446 .sgl_dma = nvgpu_mem_linux_sgl_dma,
447 .sgl_length = nvgpu_mem_linux_sgl_length,
448 .sgl_gpu_addr = nvgpu_mem_linux_sgl_gpu_addr,
449 .sgt_free = nvgpu_mem_linux_sgl_free,
450};
451
452static struct nvgpu_sgt *__nvgpu_mem_get_sgl_from_vidmem(
436 struct gk20a *g, 453 struct gk20a *g,
437 struct scatterlist *linux_sgl) 454 struct scatterlist *linux_sgl)
438{ 455{
@@ -442,70 +459,31 @@ static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_create_from_vidmem(
442 if (!vidmem_alloc) 459 if (!vidmem_alloc)
443 return NULL; 460 return NULL;
444 461
445 nvgpu_log(g, gpu_dbg_sgl, "Vidmem sgl:"); 462 return &vidmem_alloc->sgt;
446
447 return __nvgpu_mem_sgl_dup(g, vidmem_alloc->sgl);
448} 463}
449 464
450struct nvgpu_mem_sgl *nvgpu_mem_sgl_create(struct gk20a *g, 465struct nvgpu_sgt *nvgpu_linux_sgt_create(struct gk20a *g, struct sg_table *sgt)
451 struct sg_table *sgt)
452{ 466{
453 struct nvgpu_mem_sgl *head, *sgl, *next; 467 struct nvgpu_sgt *nvgpu_sgt;
454 struct scatterlist *linux_sgl = sgt->sgl; 468 struct scatterlist *linux_sgl = sgt->sgl;
455 469
456 if (is_vidmem_page_alloc(sg_dma_address(linux_sgl))) 470 if (is_vidmem_page_alloc(sg_dma_address(linux_sgl)))
457 return __nvgpu_mem_sgl_create_from_vidmem(g, linux_sgl); 471 return __nvgpu_mem_get_sgl_from_vidmem(g, linux_sgl);
458 472
459 head = nvgpu_kzalloc(g, sizeof(*sgl)); 473 nvgpu_sgt = nvgpu_kzalloc(g, sizeof(*nvgpu_sgt));
460 if (!head) 474 if (!nvgpu_sgt)
461 return NULL; 475 return NULL;
462 476
463 nvgpu_log(g, gpu_dbg_sgl, "Making sgl:"); 477 nvgpu_log(g, gpu_dbg_sgl, "Making Linux SGL!");
464 478
465 sgl = head; 479 nvgpu_sgt->sgl = sgt->sgl;
466 while (true) { 480 nvgpu_sgt->ops = &nvgpu_linux_sgt_ops;
467 sgl->dma = sg_dma_address(linux_sgl);
468 sgl->phys = sg_phys(linux_sgl);
469 sgl->length = linux_sgl->length;
470
471 /*
472 * We don't like offsets in the pages here. This will cause
473 * problems.
474 */
475 if (WARN_ON(linux_sgl->offset)) {
476 nvgpu_mem_sgl_free(g, head);
477 return NULL;
478 }
479
480 nvgpu_log(g, gpu_dbg_sgl,
481 " phys: 0x%-12llx dma: 0x%-12llx len: 0x%llx",
482 sgl->phys, sgl->dma, sgl->length);
483
484 /*
485 * When there's no more SGL ents for the Linux SGL we are
486 * done. Don't bother making any more SGL ents for the nvgpu
487 * SGL.
488 */
489 linux_sgl = sg_next(linux_sgl);
490 if (!linux_sgl)
491 break;
492
493 next = nvgpu_kzalloc(g, sizeof(*sgl));
494 if (!next) {
495 nvgpu_mem_sgl_free(g, head);
496 return NULL;
497 }
498
499 sgl->next = next;
500 sgl = next;
501 }
502 481
503 nvgpu_log(g, gpu_dbg_sgl, "Done!"); 482 return nvgpu_sgt;
504 return head;
505} 483}
506 484
507struct nvgpu_mem_sgl *nvgpu_mem_sgl_create_from_mem(struct gk20a *g, 485struct nvgpu_sgt *nvgpu_sgt_create_from_mem(struct gk20a *g,
508 struct nvgpu_mem *mem) 486 struct nvgpu_mem *mem)
509{ 487{
510 return nvgpu_mem_sgl_create(g, mem->priv.sgt); 488 return nvgpu_linux_sgt_create(g, mem->priv.sgt);
511} 489}
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c
index 4a4429dc..2e29f0f7 100644
--- a/drivers/gpu/nvgpu/common/linux/vm.c
+++ b/drivers/gpu/nvgpu/common/linux/vm.c
@@ -69,19 +69,20 @@ static u64 nvgpu_get_buffer_alignment(struct gk20a *g, struct scatterlist *sgl,
69 69
70 if (aperture == APERTURE_VIDMEM) { 70 if (aperture == APERTURE_VIDMEM) {
71 struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl); 71 struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl);
72 struct nvgpu_mem_sgl *sgl_vid = alloc->sgl; 72 struct nvgpu_sgt *sgt = &alloc->sgt;
73 void *sgl_vid = sgt->sgl;
73 74
74 while (sgl_vid) { 75 while (sgl_vid) {
75 chunk_align = 1ULL << 76 chunk_align = 1ULL <<
76 __ffs(nvgpu_mem_sgl_phys(sgl_vid) | 77 __ffs(nvgpu_sgt_get_phys(sgt, sgl_vid)) |
77 nvgpu_mem_sgl_length(sgl_vid)); 78 nvgpu_sgt_get_length(sgt, sgl_vid);
78 79
79 if (align) 80 if (align)
80 align = min(align, chunk_align); 81 align = min(align, chunk_align);
81 else 82 else
82 align = chunk_align; 83 align = chunk_align;
83 84
84 sgl_vid = nvgpu_mem_sgl_next(sgl_vid); 85 sgl_vid = nvgpu_sgt_get_next(sgt, sgl_vid);
85 } 86 }
86 87
87 return align; 88 return align;
@@ -242,7 +243,7 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
242 struct nvgpu_vm_area *vm_area = NULL; 243 struct nvgpu_vm_area *vm_area = NULL;
243 u32 ctag_offset; 244 u32 ctag_offset;
244 enum nvgpu_aperture aperture; 245 enum nvgpu_aperture aperture;
245 struct nvgpu_mem_sgl *nvgpu_sgl; 246 struct nvgpu_sgt *nvgpu_sgt;
246 247
247 /* 248 /*
248 * The kind used as part of the key for map caching. HW may 249 * The kind used as part of the key for map caching. HW may
@@ -399,12 +400,12 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
399 ctag_offset += buffer_offset >> 400 ctag_offset += buffer_offset >>
400 ilog2(g->ops.fb.compression_page_size(g)); 401 ilog2(g->ops.fb.compression_page_size(g));
401 402
402 nvgpu_sgl = nvgpu_mem_sgl_create(g, bfr.sgt); 403 nvgpu_sgt = nvgpu_linux_sgt_create(g, bfr.sgt);
403 404
404 /* update gmmu ptes */ 405 /* update gmmu ptes */
405 map_offset = g->ops.mm.gmmu_map(vm, 406 map_offset = g->ops.mm.gmmu_map(vm,
406 map_offset, 407 map_offset,
407 nvgpu_sgl, 408 nvgpu_sgt,
408 buffer_offset, /* sg offset */ 409 buffer_offset, /* sg offset */
409 mapping_size, 410 mapping_size,
410 bfr.pgsz_idx, 411 bfr.pgsz_idx,
@@ -419,7 +420,7 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
419 if (!map_offset) 420 if (!map_offset)
420 goto clean_up; 421 goto clean_up;
421 422
422 nvgpu_mem_sgl_free(g, nvgpu_sgl); 423 nvgpu_sgt_free(nvgpu_sgt, g);
423 424
424 mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer)); 425 mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer));
425 if (!mapped_buffer) { 426 if (!mapped_buffer) {
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
index 41f5acdd..66bce8f0 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -65,14 +65,14 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
65 struct gk20a *g = gk20a_from_vm(vm); 65 struct gk20a *g = gk20a_from_vm(vm);
66 u64 vaddr; 66 u64 vaddr;
67 67
68 struct nvgpu_mem_sgl *sgl = nvgpu_mem_sgl_create_from_mem(g, mem); 68 struct nvgpu_sgt *sgt = nvgpu_sgt_create_from_mem(g, mem);
69 69
70 if (!sgl) 70 if (!sgt)
71 return -ENOMEM; 71 return -ENOMEM;
72 72
73 nvgpu_mutex_acquire(&vm->update_gmmu_lock); 73 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
74 vaddr = g->ops.mm.gmmu_map(vm, addr, 74 vaddr = g->ops.mm.gmmu_map(vm, addr,
75 sgl, /* sg list */ 75 sgt, /* sg list */
76 0, /* sg offset */ 76 0, /* sg offset */
77 size, 77 size,
78 gmmu_page_size_kernel, 78 gmmu_page_size_kernel,
@@ -86,7 +86,7 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
86 aperture); 86 aperture);
87 nvgpu_mutex_release(&vm->update_gmmu_lock); 87 nvgpu_mutex_release(&vm->update_gmmu_lock);
88 88
89 nvgpu_mem_sgl_free(g, sgl); 89 nvgpu_sgt_free(sgt, g);
90 90
91 if (!vaddr) { 91 if (!vaddr) {
92 nvgpu_err(g, "failed to map buffer!"); 92 nvgpu_err(g, "failed to map buffer!");
@@ -464,7 +464,7 @@ static int __set_pd_level(struct vm_gk20a *vm,
464 * VIDMEM version of the update_ptes logic. 464 * VIDMEM version of the update_ptes logic.
465 */ 465 */
466static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm, 466static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm,
467 struct nvgpu_mem_sgl *sgl, 467 struct nvgpu_sgt *sgt,
468 u64 space_to_skip, 468 u64 space_to_skip,
469 u64 virt_addr, 469 u64 virt_addr,
470 u64 length, 470 u64 length,
@@ -472,8 +472,9 @@ static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm,
472{ 472{
473 u64 phys_addr, chunk_length; 473 u64 phys_addr, chunk_length;
474 int err = 0; 474 int err = 0;
475 void *sgl;
475 476
476 if (!sgl) { 477 if (!sgt) {
477 /* 478 /*
478 * This is considered an unmap. Just pass in 0 as the physical 479 * This is considered an unmap. Just pass in 0 as the physical
479 * address for the entire GPU range. 480 * address for the entire GPU range.
@@ -490,16 +491,17 @@ static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm,
490 * Otherwise iterate across all the chunks in this allocation and 491 * Otherwise iterate across all the chunks in this allocation and
491 * map them. 492 * map them.
492 */ 493 */
494 sgl = sgt->sgl;
493 while (sgl) { 495 while (sgl) {
494 if (space_to_skip && 496 if (space_to_skip &&
495 space_to_skip >= nvgpu_mem_sgl_length(sgl)) { 497 space_to_skip >= nvgpu_sgt_get_length(sgt, sgl)) {
496 space_to_skip -= nvgpu_mem_sgl_length(sgl); 498 space_to_skip -= nvgpu_sgt_get_length(sgt, sgl);
497 sgl = nvgpu_mem_sgl_next(sgl); 499 sgl = nvgpu_sgt_get_next(sgt, sgl);
498 continue; 500 continue;
499 } 501 }
500 502
501 phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip; 503 phys_addr = nvgpu_sgt_get_phys(sgt, sgl) + space_to_skip;
502 chunk_length = min(length, (nvgpu_mem_sgl_length(sgl) - 504 chunk_length = min(length, (nvgpu_sgt_get_length(sgt, sgl) -
503 space_to_skip)); 505 space_to_skip));
504 506
505 err = __set_pd_level(vm, &vm->pdb, 507 err = __set_pd_level(vm, &vm->pdb,
@@ -518,27 +520,27 @@ static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm,
518 */ 520 */
519 virt_addr += chunk_length; 521 virt_addr += chunk_length;
520 length -= chunk_length; 522 length -= chunk_length;
523 sgl = nvgpu_sgt_get_next(sgt, sgl);
521 524
522 if (length == 0) 525 if (length == 0)
523 break; 526 break;
524
525 sgl = nvgpu_mem_sgl_next(sgl);
526 } 527 }
527 528
528 return err; 529 return err;
529} 530}
530 531
531static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, 532static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
532 struct nvgpu_mem_sgl *sgl, 533 struct nvgpu_sgt *sgt,
533 u64 space_to_skip, 534 u64 space_to_skip,
534 u64 virt_addr, 535 u64 virt_addr,
535 u64 length, 536 u64 length,
536 struct nvgpu_gmmu_attrs *attrs) 537 struct nvgpu_gmmu_attrs *attrs)
537{ 538{
538 int err;
539 struct gk20a *g = gk20a_from_vm(vm); 539 struct gk20a *g = gk20a_from_vm(vm);
540 void *sgl;
541 int err;
540 542
541 if (!sgl) { 543 if (!sgt) {
542 /* 544 /*
543 * This is considered an unmap. Just pass in 0 as the physical 545 * This is considered an unmap. Just pass in 0 as the physical
544 * address for the entire GPU range. 546 * address for the entire GPU range.
@@ -559,8 +561,10 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
559 * mapping is simple since the "physical" address is actually a virtual 561 * mapping is simple since the "physical" address is actually a virtual
560 * IO address and will be contiguous. 562 * IO address and will be contiguous.
561 */ 563 */
564 sgl = sgt->sgl;
565
562 if (!g->mm.bypass_smmu) { 566 if (!g->mm.bypass_smmu) {
563 u64 io_addr = nvgpu_mem_sgl_gpu_addr(g, sgl, attrs); 567 u64 io_addr = nvgpu_sgt_get_gpu_addr(sgt, g, sgl, attrs);
564 568
565 io_addr += space_to_skip; 569 io_addr += space_to_skip;
566 570
@@ -586,15 +590,15 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
586 * Cut out sgl ents for space_to_skip. 590 * Cut out sgl ents for space_to_skip.
587 */ 591 */
588 if (space_to_skip && 592 if (space_to_skip &&
589 space_to_skip >= nvgpu_mem_sgl_length(sgl)) { 593 space_to_skip >= nvgpu_sgt_get_length(sgt, sgl)) {
590 space_to_skip -= nvgpu_mem_sgl_length(sgl); 594 space_to_skip -= nvgpu_sgt_get_length(sgt, sgl);
591 sgl = nvgpu_mem_sgl_next(sgl); 595 sgl = nvgpu_sgt_get_next(sgt, sgl);
592 continue; 596 continue;
593 } 597 }
594 598
595 phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip; 599 phys_addr = nvgpu_sgt_get_phys(sgt, sgl) + space_to_skip;
596 chunk_length = min(length, 600 chunk_length = min(length,
597 nvgpu_mem_sgl_length(sgl) - space_to_skip); 601 nvgpu_sgt_get_length(sgt, sgl) - space_to_skip);
598 602
599 err = __set_pd_level(vm, &vm->pdb, 603 err = __set_pd_level(vm, &vm->pdb,
600 0, 604 0,
@@ -606,7 +610,7 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
606 space_to_skip = 0; 610 space_to_skip = 0;
607 virt_addr += chunk_length; 611 virt_addr += chunk_length;
608 length -= chunk_length; 612 length -= chunk_length;
609 sgl = nvgpu_mem_sgl_next(sgl); 613 sgl = nvgpu_sgt_get_next(sgt, sgl);
610 614
611 if (length == 0) 615 if (length == 0)
612 break; 616 break;
@@ -631,7 +635,7 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
631 * case of SMMU usage. 635 * case of SMMU usage.
632 */ 636 */
633static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, 637static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
634 struct nvgpu_mem_sgl *sgl, 638 struct nvgpu_sgt *sgt,
635 u64 space_to_skip, 639 u64 space_to_skip,
636 u64 virt_addr, 640 u64 virt_addr,
637 u64 length, 641 u64 length,
@@ -669,10 +673,10 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
669 "phys offset: %#-4llx; pgsz: %3dkb perm=%-2s | " 673 "phys offset: %#-4llx; pgsz: %3dkb perm=%-2s | "
670 "kind=%#02x APT=%-6s %c%c%c%c%c", 674 "kind=%#02x APT=%-6s %c%c%c%c%c",
671 vm->name, 675 vm->name,
672 sgl ? "MAP" : "UNMAP", 676 sgt ? "MAP" : "UNMAP",
673 virt_addr, 677 virt_addr,
674 length, 678 length,
675 sgl ? nvgpu_mem_sgl_phys(sgl) : 0, 679 sgt ? nvgpu_sgt_get_phys(sgt, sgt->sgl) : 0,
676 space_to_skip, 680 space_to_skip,
677 page_size >> 10, 681 page_size >> 10,
678 nvgpu_gmmu_perm_str(attrs->rw_flag), 682 nvgpu_gmmu_perm_str(attrs->rw_flag),
@@ -690,14 +694,14 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
690 */ 694 */
691 if (attrs->aperture == APERTURE_VIDMEM) 695 if (attrs->aperture == APERTURE_VIDMEM)
692 err = __nvgpu_gmmu_update_page_table_vidmem(vm, 696 err = __nvgpu_gmmu_update_page_table_vidmem(vm,
693 sgl, 697 sgt,
694 space_to_skip, 698 space_to_skip,
695 virt_addr, 699 virt_addr,
696 length, 700 length,
697 attrs); 701 attrs);
698 else 702 else
699 err = __nvgpu_gmmu_update_page_table_sysmem(vm, 703 err = __nvgpu_gmmu_update_page_table_sysmem(vm,
700 sgl, 704 sgt,
701 space_to_skip, 705 space_to_skip,
702 virt_addr, 706 virt_addr,
703 length, 707 length,
@@ -706,7 +710,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
706 unmap_gmmu_pages(g, &vm->pdb); 710 unmap_gmmu_pages(g, &vm->pdb);
707 nvgpu_smp_mb(); 711 nvgpu_smp_mb();
708 712
709 __gmmu_dbg(g, attrs, "%-5s Done!", sgl ? "MAP" : "UNMAP"); 713 __gmmu_dbg(g, attrs, "%-5s Done!", sgt ? "MAP" : "UNMAP");
710 714
711 return err; 715 return err;
712} 716}
@@ -725,7 +729,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
725 */ 729 */
726u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, 730u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
727 u64 vaddr, 731 u64 vaddr,
728 struct nvgpu_mem_sgl *sgl, 732 struct nvgpu_sgt *sgt,
729 u64 buffer_offset, 733 u64 buffer_offset,
730 u64 size, 734 u64 size,
731 int pgsz_idx, 735 int pgsz_idx,
@@ -774,7 +778,7 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
774 allocated = true; 778 allocated = true;
775 } 779 }
776 780
777 err = __nvgpu_gmmu_update_page_table(vm, sgl, buffer_offset, 781 err = __nvgpu_gmmu_update_page_table(vm, sgt, buffer_offset,
778 vaddr, size, &attrs); 782 vaddr, size, &attrs);
779 if (err) { 783 if (err) {
780 nvgpu_err(g, "failed to update ptes on map"); 784 nvgpu_err(g, "failed to update ptes on map");
@@ -787,6 +791,7 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
787 batch->need_tlb_invalidate = true; 791 batch->need_tlb_invalidate = true;
788 792
789 return vaddr; 793 return vaddr;
794
790fail_validate: 795fail_validate:
791 if (allocated) 796 if (allocated)
792 __nvgpu_vm_free_va(vm, vaddr, pgsz_idx); 797 __nvgpu_vm_free_va(vm, vaddr, pgsz_idx);
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
index 7296c673..6decec24 100644
--- a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
@@ -19,55 +19,34 @@
19 19
20#include "gk20a/gk20a.h" 20#include "gk20a/gk20a.h"
21 21
22struct nvgpu_mem_sgl *nvgpu_mem_sgl_next(struct nvgpu_mem_sgl *sgl) 22void *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt, void *sgl)
23{ 23{
24 return sgl->next; 24 return sgt->ops->sgl_next(sgl);
25} 25}
26 26
27u64 nvgpu_mem_sgl_phys(struct nvgpu_mem_sgl *sgl) 27u64 nvgpu_sgt_get_phys(struct nvgpu_sgt *sgt, void *sgl)
28{ 28{
29 return sgl->phys; 29 return sgt->ops->sgl_phys(sgl);
30} 30}
31 31
32u64 nvgpu_mem_sgl_dma(struct nvgpu_mem_sgl *sgl) 32u64 nvgpu_sgt_get_dma(struct nvgpu_sgt *sgt, void *sgl)
33{ 33{
34 return sgl->dma; 34 return sgt->ops->sgl_dma(sgl);
35} 35}
36 36
37u64 nvgpu_mem_sgl_length(struct nvgpu_mem_sgl *sgl) 37u64 nvgpu_sgt_get_length(struct nvgpu_sgt *sgt, void *sgl)
38{ 38{
39 return sgl->length; 39 return sgt->ops->sgl_length(sgl);
40} 40}
41 41
42/* 42u64 nvgpu_sgt_get_gpu_addr(struct nvgpu_sgt *sgt, struct gk20a *g, void *sgl,
43 * This builds a GPU address for the %sgl based on whether an IOMMU is present
44 * or not. It also handles turning the physical address into the true GPU
45 * physical address that should be programmed into the page tables.
46 */
47u64 nvgpu_mem_sgl_gpu_addr(struct gk20a *g, struct nvgpu_mem_sgl *sgl,
48 struct nvgpu_gmmu_attrs *attrs) 43 struct nvgpu_gmmu_attrs *attrs)
49{ 44{
50 if (nvgpu_mem_sgl_dma(sgl) == 0) 45 return sgt->ops->sgl_gpu_addr(g, sgl, attrs);
51 return g->ops.mm.gpu_phys_addr(g, attrs,
52 nvgpu_mem_sgl_phys(sgl));
53
54 if (nvgpu_mem_sgl_dma(sgl) == DMA_ERROR_CODE)
55 return 0;
56
57 return gk20a_mm_smmu_vaddr_translate(g, nvgpu_mem_sgl_dma(sgl));
58} 46}
59 47
60void nvgpu_mem_sgl_free(struct gk20a *g, struct nvgpu_mem_sgl *sgl) 48void nvgpu_sgt_free(struct nvgpu_sgt *sgt, struct gk20a *g)
61{ 49{
62 struct nvgpu_mem_sgl *next; 50 if (sgt && sgt->ops->sgt_free)
63 51 sgt->ops->sgt_free(g, sgt);
64 /*
65 * Free each of the elements. We expect each element to have been
66 * nvgpu_k[mz]alloc()ed.
67 */
68 while (sgl) {
69 next = nvgpu_mem_sgl_next(sgl);
70 nvgpu_kfree(g, sgl);
71 sgl = next;
72 }
73} 52}
diff --git a/drivers/gpu/nvgpu/common/mm/page_allocator.c b/drivers/gpu/nvgpu/common/mm/page_allocator.c
index 6d92b457..9c35f528 100644
--- a/drivers/gpu/nvgpu/common/mm/page_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/page_allocator.c
@@ -143,20 +143,93 @@ static void nvgpu_page_release_co(struct nvgpu_allocator *a,
143 nvgpu_alloc_release_carveout(&va->source_allocator, co); 143 nvgpu_alloc_release_carveout(&va->source_allocator, co);
144} 144}
145 145
146static void *nvgpu_page_alloc_sgl_next(void *sgl)
147{
148 struct nvgpu_mem_sgl *nvgpu_sgl = sgl;
149
150 return nvgpu_sgl->next;
151}
152
153static u64 nvgpu_page_alloc_sgl_phys(void *sgl)
154{
155 struct nvgpu_mem_sgl *nvgpu_sgl = sgl;
156
157 return nvgpu_sgl->phys;
158}
159
160static u64 nvgpu_page_alloc_sgl_dma(void *sgl)
161{
162 struct nvgpu_mem_sgl *nvgpu_sgl = sgl;
163
164 return nvgpu_sgl->dma;
165}
166
167static u64 nvgpu_page_alloc_sgl_length(void *sgl)
168{
169 struct nvgpu_mem_sgl *nvgpu_sgl = sgl;
170
171 return nvgpu_sgl->length;
172}
173
174static u64 nvgpu_page_alloc_sgl_gpu_addr(struct gk20a *g, void *sgl,
175 struct nvgpu_gmmu_attrs *attrs)
176{
177 struct nvgpu_mem_sgl *nvgpu_sgl = sgl;
178
179 return nvgpu_sgl->phys;
180}
181
182static void nvgpu_page_alloc_sgt_free(struct gk20a *g, struct nvgpu_sgt *sgt)
183{
184 /*
185 * No-op here. The free is handled by the page_alloc free() functions.
186 */
187}
188
189/*
190 * These implement the generic scatter gather ops for pages allocated
191 * by the page allocator. however, the primary aim for this, is of course,
192 * vidmem.
193 */
194static const struct nvgpu_sgt_ops page_alloc_sgl_ops = {
195 .sgl_next = nvgpu_page_alloc_sgl_next,
196 .sgl_phys = nvgpu_page_alloc_sgl_phys,
197 .sgl_dma = nvgpu_page_alloc_sgl_dma,
198 .sgl_length = nvgpu_page_alloc_sgl_length,
199 .sgl_gpu_addr = nvgpu_page_alloc_sgl_gpu_addr,
200 .sgt_free = nvgpu_page_alloc_sgt_free,
201};
202
203/*
204 * This actually frees the sgl memory. Used by the page_alloc free() functions.
205 */
206static void nvgpu_page_alloc_sgl_proper_free(struct gk20a *g,
207 struct nvgpu_mem_sgl *sgl)
208{
209 struct nvgpu_mem_sgl *next;
210
211 while (sgl) {
212 next = sgl->next;
213 nvgpu_kfree(g, sgl);
214 sgl = next;
215 }
216}
217
146static void __nvgpu_free_pages(struct nvgpu_page_allocator *a, 218static void __nvgpu_free_pages(struct nvgpu_page_allocator *a,
147 struct nvgpu_page_alloc *alloc, 219 struct nvgpu_page_alloc *alloc,
148 bool free_buddy_alloc) 220 bool free_buddy_alloc)
149{ 221{
150 struct nvgpu_mem_sgl *sgl = alloc->sgl; 222 struct nvgpu_mem_sgl *sgl = alloc->sgt.sgl;
151 223
152 if (free_buddy_alloc) { 224 if (free_buddy_alloc) {
153 while (sgl) { 225 while (sgl) {
154 nvgpu_free(&a->source_allocator, sgl->phys); 226 nvgpu_free(&a->source_allocator,
155 sgl = nvgpu_mem_sgl_next(sgl); 227 nvgpu_sgt_get_phys(&alloc->sgt, sgl));
228 sgl = nvgpu_sgt_get_next(&alloc->sgt, sgl);
156 } 229 }
157 } 230 }
158 231
159 nvgpu_mem_sgl_free(a->owner->g, alloc->sgl); 232 nvgpu_page_alloc_sgl_proper_free(a->owner->g, sgl);
160 nvgpu_kmem_cache_free(a->alloc_cache, alloc); 233 nvgpu_kmem_cache_free(a->alloc_cache, alloc);
161} 234}
162 235
@@ -306,7 +379,7 @@ static int __do_slab_alloc(struct nvgpu_page_allocator *a,
306 alloc->length = slab_page->slab_size; 379 alloc->length = slab_page->slab_size;
307 alloc->base = slab_page->page_addr + (offs * slab_page->slab_size); 380 alloc->base = slab_page->page_addr + (offs * slab_page->slab_size);
308 381
309 sgl = alloc->sgl; 382 sgl = alloc->sgt.sgl;
310 sgl->phys = alloc->base; 383 sgl->phys = alloc->base;
311 sgl->dma = alloc->base; 384 sgl->dma = alloc->base;
312 sgl->length = alloc->length; 385 sgl->length = alloc->length;
@@ -338,13 +411,16 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_slab(
338 palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n"); 411 palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n");
339 goto fail; 412 goto fail;
340 } 413 }
414
415 alloc->sgt.ops = &page_alloc_sgl_ops;
416
341 sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl)); 417 sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl));
342 if (!sgl) { 418 if (!sgl) {
343 palloc_dbg(a, "OOM: could not alloc sgl struct!\n"); 419 palloc_dbg(a, "OOM: could not alloc sgl struct!\n");
344 goto fail; 420 goto fail;
345 } 421 }
346 422
347 alloc->sgl = sgl; 423 alloc->sgt.sgl = sgl;
348 err = __do_slab_alloc(a, slab, alloc); 424 err = __do_slab_alloc(a, slab, alloc);
349 if (err) 425 if (err)
350 goto fail; 426 goto fail;
@@ -432,6 +508,7 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages(
432 memset(alloc, 0, sizeof(*alloc)); 508 memset(alloc, 0, sizeof(*alloc));
433 509
434 alloc->length = pages << a->page_shift; 510 alloc->length = pages << a->page_shift;
511 alloc->sgt.ops = &page_alloc_sgl_ops;
435 512
436 while (pages) { 513 while (pages) {
437 u64 chunk_addr = 0; 514 u64 chunk_addr = 0;
@@ -495,7 +572,7 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages(
495 if (prev_sgl) 572 if (prev_sgl)
496 prev_sgl->next = sgl; 573 prev_sgl->next = sgl;
497 else 574 else
498 alloc->sgl = sgl; 575 alloc->sgt.sgl = sgl;
499 576
500 prev_sgl = sgl; 577 prev_sgl = sgl;
501 578
@@ -503,12 +580,12 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages(
503 } 580 }
504 581
505 alloc->nr_chunks = i; 582 alloc->nr_chunks = i;
506 alloc->base = alloc->sgl->phys; 583 alloc->base = ((struct nvgpu_mem_sgl *)alloc->sgt.sgl)->phys;
507 584
508 return alloc; 585 return alloc;
509 586
510fail_cleanup: 587fail_cleanup:
511 sgl = alloc->sgl; 588 sgl = alloc->sgt.sgl;
512 while (sgl) { 589 while (sgl) {
513 struct nvgpu_mem_sgl *next = sgl->next; 590 struct nvgpu_mem_sgl *next = sgl->next;
514 591
@@ -542,13 +619,13 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages(
542 619
543 palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n", 620 palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n",
544 pages << a->page_shift, pages, alloc->base); 621 pages << a->page_shift, pages, alloc->base);
545 sgl = alloc->sgl; 622 sgl = alloc->sgt.sgl;
546 while (sgl) { 623 while (sgl) {
547 palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", 624 palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n",
548 i++, 625 i++,
549 nvgpu_mem_sgl_phys(sgl), 626 nvgpu_sgt_get_phys(&alloc->sgt, sgl),
550 nvgpu_mem_sgl_length(sgl)); 627 nvgpu_sgt_get_length(&alloc->sgt, sgl));
551 sgl = sgl->next; 628 sgl = nvgpu_sgt_get_next(&alloc->sgt, sgl);
552 } 629 }
553 palloc_dbg(a, "Alloc done\n"); 630 palloc_dbg(a, "Alloc done\n");
554 631
@@ -655,6 +732,7 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed(
655 if (!alloc || !sgl) 732 if (!alloc || !sgl)
656 goto fail; 733 goto fail;
657 734
735 alloc->sgt.ops = &page_alloc_sgl_ops;
658 alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length, 0); 736 alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length, 0);
659 if (!alloc->base) { 737 if (!alloc->base) {
660 WARN(1, "nvgpu: failed to fixed alloc pages @ 0x%010llx", base); 738 WARN(1, "nvgpu: failed to fixed alloc pages @ 0x%010llx", base);
@@ -663,7 +741,7 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed(
663 741
664 alloc->nr_chunks = 1; 742 alloc->nr_chunks = 1;
665 alloc->length = length; 743 alloc->length = length;
666 alloc->sgl = sgl; 744 alloc->sgt.sgl = sgl;
667 745
668 sgl->phys = alloc->base; 746 sgl->phys = alloc->base;
669 sgl->dma = alloc->base; 747 sgl->dma = alloc->base;
@@ -708,13 +786,13 @@ static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a,
708 786
709 palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)\n", 787 palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)\n",
710 alloc->base, aligned_len, pages); 788 alloc->base, aligned_len, pages);
711 sgl = alloc->sgl; 789 sgl = alloc->sgt.sgl;
712 while (sgl) { 790 while (sgl) {
713 palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", 791 palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n",
714 i++, 792 i++,
715 nvgpu_mem_sgl_phys(sgl), 793 nvgpu_sgt_get_phys(&alloc->sgt, sgl),
716 nvgpu_mem_sgl_length(sgl)); 794 nvgpu_sgt_get_length(&alloc->sgt, sgl));
717 sgl = sgl->next; 795 sgl = nvgpu_sgt_get_next(&alloc->sgt, sgl);
718 } 796 }
719 797
720 a->nr_fixed_allocs++; 798 a->nr_fixed_allocs++;
diff --git a/drivers/gpu/nvgpu/common/pramin.c b/drivers/gpu/nvgpu/common/pramin.c
index bb7d930e..ae9c9b1f 100644
--- a/drivers/gpu/nvgpu/common/pramin.c
+++ b/drivers/gpu/nvgpu/common/pramin.c
@@ -84,24 +84,23 @@ void nvgpu_pramin_access_batched(struct gk20a *g, struct nvgpu_mem *mem,
84 u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg) 84 u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg)
85{ 85{
86 struct nvgpu_page_alloc *alloc = NULL; 86 struct nvgpu_page_alloc *alloc = NULL;
87 struct nvgpu_mem_sgl *sgl; 87 struct nvgpu_sgt *sgt;
88 void *sgl;
88 u32 byteoff, start_reg, until_end, n; 89 u32 byteoff, start_reg, until_end, n;
89 90
90 alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl); 91 alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl);
91 sgl = alloc->sgl; 92 sgt = &alloc->sgt;
92 while (sgl) { 93 for (sgl = sgt->sgl; sgl; sgl = nvgpu_sgt_get_next(sgt, sgl)) {
93 if (offset >= nvgpu_mem_sgl_length(sgl)) { 94 if (offset >= nvgpu_sgt_get_length(sgt, sgl))
94 offset -= nvgpu_mem_sgl_length(sgl); 95 offset -= nvgpu_sgt_get_length(sgt, sgl);
95 sgl = sgl->next; 96 else
96 } else {
97 break; 97 break;
98 }
99 } 98 }
100 99
101 while (size) { 100 while (size) {
102 u32 sgl_len = (u32)nvgpu_mem_sgl_length(sgl); 101 u32 sgl_len = (u32)nvgpu_sgt_get_length(sgt, sgl);
103 102
104 byteoff = g->ops.pramin.enter(g, mem, sgl, 103 byteoff = g->ops.pramin.enter(g, mem, sgt, sgl,
105 offset / sizeof(u32)); 104 offset / sizeof(u32));
106 start_reg = g->ops.pramin.data032_r(byteoff / sizeof(u32)); 105 start_reg = g->ops.pramin.data032_r(byteoff / sizeof(u32));
107 until_end = SZ_1M - (byteoff & (SZ_1M - 1)); 106 until_end = SZ_1M - (byteoff & (SZ_1M - 1));
@@ -117,7 +116,7 @@ void nvgpu_pramin_access_batched(struct gk20a *g, struct nvgpu_mem *mem,
117 size -= n; 116 size -= n;
118 117
119 if (n == (sgl_len - offset)) { 118 if (n == (sgl_len - offset)) {
120 sgl = nvgpu_mem_sgl_next(sgl); 119 sgl = nvgpu_sgt_get_next(sgt, sgl);
121 offset = 0; 120 offset = 0;
122 } else { 121 } else {
123 offset += n; 122 offset += n;