summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/mm/gmmu.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/common/mm/gmmu.c')
-rw-r--r--drivers/gpu/nvgpu/common/mm/gmmu.c109
1 files changed, 49 insertions, 60 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
index 7f486d68..41f5acdd 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -65,11 +65,14 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
65 struct gk20a *g = gk20a_from_vm(vm); 65 struct gk20a *g = gk20a_from_vm(vm);
66 u64 vaddr; 66 u64 vaddr;
67 67
68 struct sg_table *sgt = mem->priv.sgt; 68 struct nvgpu_mem_sgl *sgl = nvgpu_mem_sgl_create_from_mem(g, mem);
69
70 if (!sgl)
71 return -ENOMEM;
69 72
70 nvgpu_mutex_acquire(&vm->update_gmmu_lock); 73 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
71 vaddr = g->ops.mm.gmmu_map(vm, addr, 74 vaddr = g->ops.mm.gmmu_map(vm, addr,
72 sgt, /* sg table */ 75 sgl, /* sg list */
73 0, /* sg offset */ 76 0, /* sg offset */
74 size, 77 size,
75 gmmu_page_size_kernel, 78 gmmu_page_size_kernel,
@@ -82,8 +85,11 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
82 NULL, /* mapping_batch handle */ 85 NULL, /* mapping_batch handle */
83 aperture); 86 aperture);
84 nvgpu_mutex_release(&vm->update_gmmu_lock); 87 nvgpu_mutex_release(&vm->update_gmmu_lock);
88
89 nvgpu_mem_sgl_free(g, sgl);
90
85 if (!vaddr) { 91 if (!vaddr) {
86 nvgpu_err(g, "failed to allocate va space"); 92 nvgpu_err(g, "failed to map buffer!");
87 return 0; 93 return 0;
88 } 94 }
89 95
@@ -91,7 +97,7 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
91} 97}
92 98
93/* 99/*
94 * Convenience wrapper over __nvgpu_gmmu_map() for non-fixed mappings. 100 * Map a nvgpu_mem into the GMMU. This is for kernel space to use.
95 */ 101 */
96u64 nvgpu_gmmu_map(struct vm_gk20a *vm, 102u64 nvgpu_gmmu_map(struct vm_gk20a *vm,
97 struct nvgpu_mem *mem, 103 struct nvgpu_mem *mem,
@@ -106,7 +112,7 @@ u64 nvgpu_gmmu_map(struct vm_gk20a *vm,
106} 112}
107 113
108/* 114/*
109 * Like nvgpu_gmmu_map() except it can work on a fixed address instead. 115 * Like nvgpu_gmmu_map() except this can work on a fixed address.
110 */ 116 */
111u64 nvgpu_gmmu_map_fixed(struct vm_gk20a *vm, 117u64 nvgpu_gmmu_map_fixed(struct vm_gk20a *vm,
112 struct nvgpu_mem *mem, 118 struct nvgpu_mem *mem,
@@ -407,7 +413,7 @@ static int __set_pd_level(struct vm_gk20a *vm,
407 */ 413 */
408 target_addr = next_pd ? 414 target_addr = next_pd ?
409 nvgpu_pde_phys_addr(g, next_pd) : 415 nvgpu_pde_phys_addr(g, next_pd) :
410 g->ops.mm.gpu_phys_addr(g, attrs, phys_addr); 416 phys_addr;
411 417
412 l->update_entry(vm, l, 418 l->update_entry(vm, l,
413 pd, pd_idx, 419 pd, pd_idx,
@@ -458,18 +464,16 @@ static int __set_pd_level(struct vm_gk20a *vm,
458 * VIDMEM version of the update_ptes logic. 464 * VIDMEM version of the update_ptes logic.
459 */ 465 */
460static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm, 466static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm,
461 struct sg_table *sgt, 467 struct nvgpu_mem_sgl *sgl,
462 u64 space_to_skip, 468 u64 space_to_skip,
463 u64 virt_addr, 469 u64 virt_addr,
464 u64 length, 470 u64 length,
465 struct nvgpu_gmmu_attrs *attrs) 471 struct nvgpu_gmmu_attrs *attrs)
466{ 472{
467 struct nvgpu_page_alloc *alloc = NULL;
468 struct page_alloc_chunk *chunk = NULL;
469 u64 phys_addr, chunk_length; 473 u64 phys_addr, chunk_length;
470 int err = 0; 474 int err = 0;
471 475
472 if (!sgt) { 476 if (!sgl) {
473 /* 477 /*
474 * This is considered an unmap. Just pass in 0 as the physical 478 * This is considered an unmap. Just pass in 0 as the physical
475 * address for the entire GPU range. 479 * address for the entire GPU range.
@@ -482,22 +486,21 @@ static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm,
482 return err; 486 return err;
483 } 487 }
484 488
485 alloc = get_vidmem_page_alloc(sgt->sgl);
486
487 /* 489 /*
488 * Otherwise iterate across all the chunks in this allocation and 490 * Otherwise iterate across all the chunks in this allocation and
489 * map them. 491 * map them.
490 */ 492 */
491 nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks, 493 while (sgl) {
492 page_alloc_chunk, list_entry) {
493 if (space_to_skip && 494 if (space_to_skip &&
494 space_to_skip >= chunk->length) { 495 space_to_skip >= nvgpu_mem_sgl_length(sgl)) {
495 space_to_skip -= chunk->length; 496 space_to_skip -= nvgpu_mem_sgl_length(sgl);
497 sgl = nvgpu_mem_sgl_next(sgl);
496 continue; 498 continue;
497 } 499 }
498 500
499 phys_addr = chunk->base + space_to_skip; 501 phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip;
500 chunk_length = min(length, (chunk->length - space_to_skip)); 502 chunk_length = min(length, (nvgpu_mem_sgl_length(sgl) -
503 space_to_skip));
501 504
502 err = __set_pd_level(vm, &vm->pdb, 505 err = __set_pd_level(vm, &vm->pdb,
503 0, 506 0,
@@ -518,23 +521,24 @@ static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm,
518 521
519 if (length == 0) 522 if (length == 0)
520 break; 523 break;
524
525 sgl = nvgpu_mem_sgl_next(sgl);
521 } 526 }
522 527
523 return err; 528 return err;
524} 529}
525 530
526static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, 531static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
527 struct sg_table *sgt, 532 struct nvgpu_mem_sgl *sgl,
528 u64 space_to_skip, 533 u64 space_to_skip,
529 u64 virt_addr, 534 u64 virt_addr,
530 u64 length, 535 u64 length,
531 struct nvgpu_gmmu_attrs *attrs) 536 struct nvgpu_gmmu_attrs *attrs)
532{ 537{
533 int err; 538 int err;
534 struct scatterlist *sgl;
535 struct gk20a *g = gk20a_from_vm(vm); 539 struct gk20a *g = gk20a_from_vm(vm);
536 540
537 if (!sgt) { 541 if (!sgl) {
538 /* 542 /*
539 * This is considered an unmap. Just pass in 0 as the physical 543 * This is considered an unmap. Just pass in 0 as the physical
540 * address for the entire GPU range. 544 * address for the entire GPU range.
@@ -548,19 +552,15 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
548 } 552 }
549 553
550 /* 554 /*
551 * At this point we have a Linux scatter-gather list pointing to some 555 * At this point we have a scatter-gather list pointing to some number
552 * number of discontiguous chunks of memory. Iterate over that list and 556 * of discontiguous chunks of memory. We must iterate over that list and
553 * generate a GMMU map call for each chunk. There are two possibilities: 557 * generate a GMMU map call for each chunk. There are two possibilities:
554 * either the IOMMU is enabled or not. When the IOMMU is enabled the 558 * either an IOMMU is enabled or not. When an IOMMU is enabled the
555 * mapping is simple since the "physical" address is actually a virtual 559 * mapping is simple since the "physical" address is actually a virtual
556 * IO address and will be contiguous. The no-IOMMU case is more 560 * IO address and will be contiguous.
557 * complicated. We will have to iterate over the SGT and do a separate
558 * map for each chunk of the SGT.
559 */ 561 */
560 sgl = sgt->sgl;
561
562 if (!g->mm.bypass_smmu) { 562 if (!g->mm.bypass_smmu) {
563 u64 io_addr = nvgpu_mem_get_addr_sgl(g, sgl); 563 u64 io_addr = nvgpu_mem_sgl_gpu_addr(g, sgl, attrs);
564 564
565 io_addr += space_to_skip; 565 io_addr += space_to_skip;
566 566
@@ -585,14 +585,16 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
585 /* 585 /*
586 * Cut out sgl ents for space_to_skip. 586 * Cut out sgl ents for space_to_skip.
587 */ 587 */
588 if (space_to_skip && space_to_skip >= sgl->length) { 588 if (space_to_skip &&
589 space_to_skip -= sgl->length; 589 space_to_skip >= nvgpu_mem_sgl_length(sgl)) {
590 sgl = sg_next(sgl); 590 space_to_skip -= nvgpu_mem_sgl_length(sgl);
591 sgl = nvgpu_mem_sgl_next(sgl);
591 continue; 592 continue;
592 } 593 }
593 594
594 phys_addr = sg_phys(sgl) + space_to_skip; 595 phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip;
595 chunk_length = min(length, sgl->length - space_to_skip); 596 chunk_length = min(length,
597 nvgpu_mem_sgl_length(sgl) - space_to_skip);
596 598
597 err = __set_pd_level(vm, &vm->pdb, 599 err = __set_pd_level(vm, &vm->pdb,
598 0, 600 0,
@@ -600,13 +602,11 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
600 virt_addr, 602 virt_addr,
601 chunk_length, 603 chunk_length,
602 attrs); 604 attrs);
603 if (err)
604 return err;
605 605
606 space_to_skip = 0; 606 space_to_skip = 0;
607 virt_addr += chunk_length; 607 virt_addr += chunk_length;
608 length -= chunk_length; 608 length -= chunk_length;
609 sgl = sg_next(sgl); 609 sgl = nvgpu_mem_sgl_next(sgl);
610 610
611 if (length == 0) 611 if (length == 0)
612 break; 612 break;
@@ -624,22 +624,20 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
624 * implementations. But the logic around that is generic to all chips. Every 624 * implementations. But the logic around that is generic to all chips. Every
625 * chip has some number of PDE levels and then a PTE level. 625 * chip has some number of PDE levels and then a PTE level.
626 * 626 *
627 * Each chunk of the incoming SGT is sent to the chip specific implementation 627 * Each chunk of the incoming SGL is sent to the chip specific implementation
628 * of page table update. 628 * of page table update.
629 * 629 *
630 * [*] Note: the "physical" address may actually be an IO virtual address in the 630 * [*] Note: the "physical" address may actually be an IO virtual address in the
631 * case of SMMU usage. 631 * case of SMMU usage.
632 */ 632 */
633static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, 633static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
634 struct sg_table *sgt, 634 struct nvgpu_mem_sgl *sgl,
635 u64 space_to_skip, 635 u64 space_to_skip,
636 u64 virt_addr, 636 u64 virt_addr,
637 u64 length, 637 u64 length,
638 struct nvgpu_gmmu_attrs *attrs) 638 struct nvgpu_gmmu_attrs *attrs)
639{ 639{
640 struct gk20a *g = gk20a_from_vm(vm); 640 struct gk20a *g = gk20a_from_vm(vm);
641 struct nvgpu_page_alloc *alloc;
642 u64 phys_addr = 0;
643 u32 page_size; 641 u32 page_size;
644 int err; 642 int err;
645 643
@@ -665,25 +663,16 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
665 return err; 663 return err;
666 } 664 }
667 665
668 if (sgt) {
669 if (attrs->aperture == APERTURE_VIDMEM) {
670 alloc = get_vidmem_page_alloc(sgt->sgl);
671
672 phys_addr = alloc->base;
673 } else
674 phys_addr = nvgpu_mem_get_addr_sgl(g, sgt->sgl);
675 }
676
677 __gmmu_dbg(g, attrs, 666 __gmmu_dbg(g, attrs,
678 "vm=%s " 667 "vm=%s "
679 "%-5s GPU virt %#-12llx +%#-9llx phys %#-12llx " 668 "%-5s GPU virt %#-12llx +%#-9llx phys %#-12llx "
680 "phys offset: %#-4llx; pgsz: %3dkb perm=%-2s | " 669 "phys offset: %#-4llx; pgsz: %3dkb perm=%-2s | "
681 "kind=%#02x APT=%-6s %c%c%c%c%c", 670 "kind=%#02x APT=%-6s %c%c%c%c%c",
682 vm->name, 671 vm->name,
683 sgt ? "MAP" : "UNMAP", 672 sgl ? "MAP" : "UNMAP",
684 virt_addr, 673 virt_addr,
685 length, 674 length,
686 phys_addr, 675 sgl ? nvgpu_mem_sgl_phys(sgl) : 0,
687 space_to_skip, 676 space_to_skip,
688 page_size >> 10, 677 page_size >> 10,
689 nvgpu_gmmu_perm_str(attrs->rw_flag), 678 nvgpu_gmmu_perm_str(attrs->rw_flag),
@@ -696,19 +685,19 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
696 attrs->valid ? 'V' : '-'); 685 attrs->valid ? 'V' : '-');
697 686
698 /* 687 /*
699 * Handle VIDMEM progamming. Currently uses a different scatter list 688 * For historical reasons these are separate, but soon these will be
700 * format. 689 * unified.
701 */ 690 */
702 if (attrs->aperture == APERTURE_VIDMEM) 691 if (attrs->aperture == APERTURE_VIDMEM)
703 err = __nvgpu_gmmu_update_page_table_vidmem(vm, 692 err = __nvgpu_gmmu_update_page_table_vidmem(vm,
704 sgt, 693 sgl,
705 space_to_skip, 694 space_to_skip,
706 virt_addr, 695 virt_addr,
707 length, 696 length,
708 attrs); 697 attrs);
709 else 698 else
710 err = __nvgpu_gmmu_update_page_table_sysmem(vm, 699 err = __nvgpu_gmmu_update_page_table_sysmem(vm,
711 sgt, 700 sgl,
712 space_to_skip, 701 space_to_skip,
713 virt_addr, 702 virt_addr,
714 length, 703 length,
@@ -717,7 +706,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
717 unmap_gmmu_pages(g, &vm->pdb); 706 unmap_gmmu_pages(g, &vm->pdb);
718 nvgpu_smp_mb(); 707 nvgpu_smp_mb();
719 708
720 __gmmu_dbg(g, attrs, "%-5s Done!", sgt ? "MAP" : "UNMAP"); 709 __gmmu_dbg(g, attrs, "%-5s Done!", sgl ? "MAP" : "UNMAP");
721 710
722 return err; 711 return err;
723} 712}
@@ -736,7 +725,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
736 */ 725 */
737u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, 726u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
738 u64 vaddr, 727 u64 vaddr,
739 struct sg_table *sgt, 728 struct nvgpu_mem_sgl *sgl,
740 u64 buffer_offset, 729 u64 buffer_offset,
741 u64 size, 730 u64 size,
742 int pgsz_idx, 731 int pgsz_idx,
@@ -785,7 +774,7 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
785 allocated = true; 774 allocated = true;
786 } 775 }
787 776
788 err = __nvgpu_gmmu_update_page_table(vm, sgt, buffer_offset, 777 err = __nvgpu_gmmu_update_page_table(vm, sgl, buffer_offset,
789 vaddr, size, &attrs); 778 vaddr, size, &attrs);
790 if (err) { 779 if (err) {
791 nvgpu_err(g, "failed to update ptes on map"); 780 nvgpu_err(g, "failed to update ptes on map");