diff options
Diffstat (limited to 'drivers/gpu/nvgpu/common/mm/gmmu.c')
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/gmmu.c | 109 |
1 files changed, 49 insertions, 60 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c index 7f486d68..41f5acdd 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu.c | |||
@@ -65,11 +65,14 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm, | |||
65 | struct gk20a *g = gk20a_from_vm(vm); | 65 | struct gk20a *g = gk20a_from_vm(vm); |
66 | u64 vaddr; | 66 | u64 vaddr; |
67 | 67 | ||
68 | struct sg_table *sgt = mem->priv.sgt; | 68 | struct nvgpu_mem_sgl *sgl = nvgpu_mem_sgl_create_from_mem(g, mem); |
69 | |||
70 | if (!sgl) | ||
71 | return -ENOMEM; | ||
69 | 72 | ||
70 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); | 73 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); |
71 | vaddr = g->ops.mm.gmmu_map(vm, addr, | 74 | vaddr = g->ops.mm.gmmu_map(vm, addr, |
72 | sgt, /* sg table */ | 75 | sgl, /* sg list */ |
73 | 0, /* sg offset */ | 76 | 0, /* sg offset */ |
74 | size, | 77 | size, |
75 | gmmu_page_size_kernel, | 78 | gmmu_page_size_kernel, |
@@ -82,8 +85,11 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm, | |||
82 | NULL, /* mapping_batch handle */ | 85 | NULL, /* mapping_batch handle */ |
83 | aperture); | 86 | aperture); |
84 | nvgpu_mutex_release(&vm->update_gmmu_lock); | 87 | nvgpu_mutex_release(&vm->update_gmmu_lock); |
88 | |||
89 | nvgpu_mem_sgl_free(g, sgl); | ||
90 | |||
85 | if (!vaddr) { | 91 | if (!vaddr) { |
86 | nvgpu_err(g, "failed to allocate va space"); | 92 | nvgpu_err(g, "failed to map buffer!"); |
87 | return 0; | 93 | return 0; |
88 | } | 94 | } |
89 | 95 | ||
@@ -91,7 +97,7 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm, | |||
91 | } | 97 | } |
92 | 98 | ||
93 | /* | 99 | /* |
94 | * Convenience wrapper over __nvgpu_gmmu_map() for non-fixed mappings. | 100 | * Map a nvgpu_mem into the GMMU. This is for kernel space to use. |
95 | */ | 101 | */ |
96 | u64 nvgpu_gmmu_map(struct vm_gk20a *vm, | 102 | u64 nvgpu_gmmu_map(struct vm_gk20a *vm, |
97 | struct nvgpu_mem *mem, | 103 | struct nvgpu_mem *mem, |
@@ -106,7 +112,7 @@ u64 nvgpu_gmmu_map(struct vm_gk20a *vm, | |||
106 | } | 112 | } |
107 | 113 | ||
108 | /* | 114 | /* |
109 | * Like nvgpu_gmmu_map() except it can work on a fixed address instead. | 115 | * Like nvgpu_gmmu_map() except this can work on a fixed address. |
110 | */ | 116 | */ |
111 | u64 nvgpu_gmmu_map_fixed(struct vm_gk20a *vm, | 117 | u64 nvgpu_gmmu_map_fixed(struct vm_gk20a *vm, |
112 | struct nvgpu_mem *mem, | 118 | struct nvgpu_mem *mem, |
@@ -407,7 +413,7 @@ static int __set_pd_level(struct vm_gk20a *vm, | |||
407 | */ | 413 | */ |
408 | target_addr = next_pd ? | 414 | target_addr = next_pd ? |
409 | nvgpu_pde_phys_addr(g, next_pd) : | 415 | nvgpu_pde_phys_addr(g, next_pd) : |
410 | g->ops.mm.gpu_phys_addr(g, attrs, phys_addr); | 416 | phys_addr; |
411 | 417 | ||
412 | l->update_entry(vm, l, | 418 | l->update_entry(vm, l, |
413 | pd, pd_idx, | 419 | pd, pd_idx, |
@@ -458,18 +464,16 @@ static int __set_pd_level(struct vm_gk20a *vm, | |||
458 | * VIDMEM version of the update_ptes logic. | 464 | * VIDMEM version of the update_ptes logic. |
459 | */ | 465 | */ |
460 | static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm, | 466 | static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm, |
461 | struct sg_table *sgt, | 467 | struct nvgpu_mem_sgl *sgl, |
462 | u64 space_to_skip, | 468 | u64 space_to_skip, |
463 | u64 virt_addr, | 469 | u64 virt_addr, |
464 | u64 length, | 470 | u64 length, |
465 | struct nvgpu_gmmu_attrs *attrs) | 471 | struct nvgpu_gmmu_attrs *attrs) |
466 | { | 472 | { |
467 | struct nvgpu_page_alloc *alloc = NULL; | ||
468 | struct page_alloc_chunk *chunk = NULL; | ||
469 | u64 phys_addr, chunk_length; | 473 | u64 phys_addr, chunk_length; |
470 | int err = 0; | 474 | int err = 0; |
471 | 475 | ||
472 | if (!sgt) { | 476 | if (!sgl) { |
473 | /* | 477 | /* |
474 | * This is considered an unmap. Just pass in 0 as the physical | 478 | * This is considered an unmap. Just pass in 0 as the physical |
475 | * address for the entire GPU range. | 479 | * address for the entire GPU range. |
@@ -482,22 +486,21 @@ static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm, | |||
482 | return err; | 486 | return err; |
483 | } | 487 | } |
484 | 488 | ||
485 | alloc = get_vidmem_page_alloc(sgt->sgl); | ||
486 | |||
487 | /* | 489 | /* |
488 | * Otherwise iterate across all the chunks in this allocation and | 490 | * Otherwise iterate across all the chunks in this allocation and |
489 | * map them. | 491 | * map them. |
490 | */ | 492 | */ |
491 | nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks, | 493 | while (sgl) { |
492 | page_alloc_chunk, list_entry) { | ||
493 | if (space_to_skip && | 494 | if (space_to_skip && |
494 | space_to_skip >= chunk->length) { | 495 | space_to_skip >= nvgpu_mem_sgl_length(sgl)) { |
495 | space_to_skip -= chunk->length; | 496 | space_to_skip -= nvgpu_mem_sgl_length(sgl); |
497 | sgl = nvgpu_mem_sgl_next(sgl); | ||
496 | continue; | 498 | continue; |
497 | } | 499 | } |
498 | 500 | ||
499 | phys_addr = chunk->base + space_to_skip; | 501 | phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip; |
500 | chunk_length = min(length, (chunk->length - space_to_skip)); | 502 | chunk_length = min(length, (nvgpu_mem_sgl_length(sgl) - |
503 | space_to_skip)); | ||
501 | 504 | ||
502 | err = __set_pd_level(vm, &vm->pdb, | 505 | err = __set_pd_level(vm, &vm->pdb, |
503 | 0, | 506 | 0, |
@@ -518,23 +521,24 @@ static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm, | |||
518 | 521 | ||
519 | if (length == 0) | 522 | if (length == 0) |
520 | break; | 523 | break; |
524 | |||
525 | sgl = nvgpu_mem_sgl_next(sgl); | ||
521 | } | 526 | } |
522 | 527 | ||
523 | return err; | 528 | return err; |
524 | } | 529 | } |
525 | 530 | ||
526 | static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, | 531 | static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, |
527 | struct sg_table *sgt, | 532 | struct nvgpu_mem_sgl *sgl, |
528 | u64 space_to_skip, | 533 | u64 space_to_skip, |
529 | u64 virt_addr, | 534 | u64 virt_addr, |
530 | u64 length, | 535 | u64 length, |
531 | struct nvgpu_gmmu_attrs *attrs) | 536 | struct nvgpu_gmmu_attrs *attrs) |
532 | { | 537 | { |
533 | int err; | 538 | int err; |
534 | struct scatterlist *sgl; | ||
535 | struct gk20a *g = gk20a_from_vm(vm); | 539 | struct gk20a *g = gk20a_from_vm(vm); |
536 | 540 | ||
537 | if (!sgt) { | 541 | if (!sgl) { |
538 | /* | 542 | /* |
539 | * This is considered an unmap. Just pass in 0 as the physical | 543 | * This is considered an unmap. Just pass in 0 as the physical |
540 | * address for the entire GPU range. | 544 | * address for the entire GPU range. |
@@ -548,19 +552,15 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, | |||
548 | } | 552 | } |
549 | 553 | ||
550 | /* | 554 | /* |
551 | * At this point we have a Linux scatter-gather list pointing to some | 555 | * At this point we have a scatter-gather list pointing to some number |
552 | * number of discontiguous chunks of memory. Iterate over that list and | 556 | * of discontiguous chunks of memory. We must iterate over that list and |
553 | * generate a GMMU map call for each chunk. There are two possibilities: | 557 | * generate a GMMU map call for each chunk. There are two possibilities: |
554 | * either the IOMMU is enabled or not. When the IOMMU is enabled the | 558 | * either an IOMMU is enabled or not. When an IOMMU is enabled the |
555 | * mapping is simple since the "physical" address is actually a virtual | 559 | * mapping is simple since the "physical" address is actually a virtual |
556 | * IO address and will be contiguous. The no-IOMMU case is more | 560 | * IO address and will be contiguous. |
557 | * complicated. We will have to iterate over the SGT and do a separate | ||
558 | * map for each chunk of the SGT. | ||
559 | */ | 561 | */ |
560 | sgl = sgt->sgl; | ||
561 | |||
562 | if (!g->mm.bypass_smmu) { | 562 | if (!g->mm.bypass_smmu) { |
563 | u64 io_addr = nvgpu_mem_get_addr_sgl(g, sgl); | 563 | u64 io_addr = nvgpu_mem_sgl_gpu_addr(g, sgl, attrs); |
564 | 564 | ||
565 | io_addr += space_to_skip; | 565 | io_addr += space_to_skip; |
566 | 566 | ||
@@ -585,14 +585,16 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, | |||
585 | /* | 585 | /* |
586 | * Cut out sgl ents for space_to_skip. | 586 | * Cut out sgl ents for space_to_skip. |
587 | */ | 587 | */ |
588 | if (space_to_skip && space_to_skip >= sgl->length) { | 588 | if (space_to_skip && |
589 | space_to_skip -= sgl->length; | 589 | space_to_skip >= nvgpu_mem_sgl_length(sgl)) { |
590 | sgl = sg_next(sgl); | 590 | space_to_skip -= nvgpu_mem_sgl_length(sgl); |
591 | sgl = nvgpu_mem_sgl_next(sgl); | ||
591 | continue; | 592 | continue; |
592 | } | 593 | } |
593 | 594 | ||
594 | phys_addr = sg_phys(sgl) + space_to_skip; | 595 | phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip; |
595 | chunk_length = min(length, sgl->length - space_to_skip); | 596 | chunk_length = min(length, |
597 | nvgpu_mem_sgl_length(sgl) - space_to_skip); | ||
596 | 598 | ||
597 | err = __set_pd_level(vm, &vm->pdb, | 599 | err = __set_pd_level(vm, &vm->pdb, |
598 | 0, | 600 | 0, |
@@ -600,13 +602,11 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, | |||
600 | virt_addr, | 602 | virt_addr, |
601 | chunk_length, | 603 | chunk_length, |
602 | attrs); | 604 | attrs); |
603 | if (err) | ||
604 | return err; | ||
605 | 605 | ||
606 | space_to_skip = 0; | 606 | space_to_skip = 0; |
607 | virt_addr += chunk_length; | 607 | virt_addr += chunk_length; |
608 | length -= chunk_length; | 608 | length -= chunk_length; |
609 | sgl = sg_next(sgl); | 609 | sgl = nvgpu_mem_sgl_next(sgl); |
610 | 610 | ||
611 | if (length == 0) | 611 | if (length == 0) |
612 | break; | 612 | break; |
@@ -624,22 +624,20 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, | |||
624 | * implementations. But the logic around that is generic to all chips. Every | 624 | * implementations. But the logic around that is generic to all chips. Every |
625 | * chip has some number of PDE levels and then a PTE level. | 625 | * chip has some number of PDE levels and then a PTE level. |
626 | * | 626 | * |
627 | * Each chunk of the incoming SGT is sent to the chip specific implementation | 627 | * Each chunk of the incoming SGL is sent to the chip specific implementation |
628 | * of page table update. | 628 | * of page table update. |
629 | * | 629 | * |
630 | * [*] Note: the "physical" address may actually be an IO virtual address in the | 630 | * [*] Note: the "physical" address may actually be an IO virtual address in the |
631 | * case of SMMU usage. | 631 | * case of SMMU usage. |
632 | */ | 632 | */ |
633 | static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, | 633 | static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, |
634 | struct sg_table *sgt, | 634 | struct nvgpu_mem_sgl *sgl, |
635 | u64 space_to_skip, | 635 | u64 space_to_skip, |
636 | u64 virt_addr, | 636 | u64 virt_addr, |
637 | u64 length, | 637 | u64 length, |
638 | struct nvgpu_gmmu_attrs *attrs) | 638 | struct nvgpu_gmmu_attrs *attrs) |
639 | { | 639 | { |
640 | struct gk20a *g = gk20a_from_vm(vm); | 640 | struct gk20a *g = gk20a_from_vm(vm); |
641 | struct nvgpu_page_alloc *alloc; | ||
642 | u64 phys_addr = 0; | ||
643 | u32 page_size; | 641 | u32 page_size; |
644 | int err; | 642 | int err; |
645 | 643 | ||
@@ -665,25 +663,16 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, | |||
665 | return err; | 663 | return err; |
666 | } | 664 | } |
667 | 665 | ||
668 | if (sgt) { | ||
669 | if (attrs->aperture == APERTURE_VIDMEM) { | ||
670 | alloc = get_vidmem_page_alloc(sgt->sgl); | ||
671 | |||
672 | phys_addr = alloc->base; | ||
673 | } else | ||
674 | phys_addr = nvgpu_mem_get_addr_sgl(g, sgt->sgl); | ||
675 | } | ||
676 | |||
677 | __gmmu_dbg(g, attrs, | 666 | __gmmu_dbg(g, attrs, |
678 | "vm=%s " | 667 | "vm=%s " |
679 | "%-5s GPU virt %#-12llx +%#-9llx phys %#-12llx " | 668 | "%-5s GPU virt %#-12llx +%#-9llx phys %#-12llx " |
680 | "phys offset: %#-4llx; pgsz: %3dkb perm=%-2s | " | 669 | "phys offset: %#-4llx; pgsz: %3dkb perm=%-2s | " |
681 | "kind=%#02x APT=%-6s %c%c%c%c%c", | 670 | "kind=%#02x APT=%-6s %c%c%c%c%c", |
682 | vm->name, | 671 | vm->name, |
683 | sgt ? "MAP" : "UNMAP", | 672 | sgl ? "MAP" : "UNMAP", |
684 | virt_addr, | 673 | virt_addr, |
685 | length, | 674 | length, |
686 | phys_addr, | 675 | sgl ? nvgpu_mem_sgl_phys(sgl) : 0, |
687 | space_to_skip, | 676 | space_to_skip, |
688 | page_size >> 10, | 677 | page_size >> 10, |
689 | nvgpu_gmmu_perm_str(attrs->rw_flag), | 678 | nvgpu_gmmu_perm_str(attrs->rw_flag), |
@@ -696,19 +685,19 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, | |||
696 | attrs->valid ? 'V' : '-'); | 685 | attrs->valid ? 'V' : '-'); |
697 | 686 | ||
698 | /* | 687 | /* |
699 | * Handle VIDMEM progamming. Currently uses a different scatter list | 688 | * For historical reasons these are separate, but soon these will be |
700 | * format. | 689 | * unified. |
701 | */ | 690 | */ |
702 | if (attrs->aperture == APERTURE_VIDMEM) | 691 | if (attrs->aperture == APERTURE_VIDMEM) |
703 | err = __nvgpu_gmmu_update_page_table_vidmem(vm, | 692 | err = __nvgpu_gmmu_update_page_table_vidmem(vm, |
704 | sgt, | 693 | sgl, |
705 | space_to_skip, | 694 | space_to_skip, |
706 | virt_addr, | 695 | virt_addr, |
707 | length, | 696 | length, |
708 | attrs); | 697 | attrs); |
709 | else | 698 | else |
710 | err = __nvgpu_gmmu_update_page_table_sysmem(vm, | 699 | err = __nvgpu_gmmu_update_page_table_sysmem(vm, |
711 | sgt, | 700 | sgl, |
712 | space_to_skip, | 701 | space_to_skip, |
713 | virt_addr, | 702 | virt_addr, |
714 | length, | 703 | length, |
@@ -717,7 +706,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, | |||
717 | unmap_gmmu_pages(g, &vm->pdb); | 706 | unmap_gmmu_pages(g, &vm->pdb); |
718 | nvgpu_smp_mb(); | 707 | nvgpu_smp_mb(); |
719 | 708 | ||
720 | __gmmu_dbg(g, attrs, "%-5s Done!", sgt ? "MAP" : "UNMAP"); | 709 | __gmmu_dbg(g, attrs, "%-5s Done!", sgl ? "MAP" : "UNMAP"); |
721 | 710 | ||
722 | return err; | 711 | return err; |
723 | } | 712 | } |
@@ -736,7 +725,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, | |||
736 | */ | 725 | */ |
737 | u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, | 726 | u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, |
738 | u64 vaddr, | 727 | u64 vaddr, |
739 | struct sg_table *sgt, | 728 | struct nvgpu_mem_sgl *sgl, |
740 | u64 buffer_offset, | 729 | u64 buffer_offset, |
741 | u64 size, | 730 | u64 size, |
742 | int pgsz_idx, | 731 | int pgsz_idx, |
@@ -785,7 +774,7 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, | |||
785 | allocated = true; | 774 | allocated = true; |
786 | } | 775 | } |
787 | 776 | ||
788 | err = __nvgpu_gmmu_update_page_table(vm, sgt, buffer_offset, | 777 | err = __nvgpu_gmmu_update_page_table(vm, sgl, buffer_offset, |
789 | vaddr, size, &attrs); | 778 | vaddr, size, &attrs); |
790 | if (err) { | 779 | if (err) { |
791 | nvgpu_err(g, "failed to update ptes on map"); | 780 | nvgpu_err(g, "failed to update ptes on map"); |