diff options
author | Alex Waterman <alexw@nvidia.com> | 2017-05-25 19:56:50 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-09-22 15:52:48 -0400 |
commit | 0090ee5aca268a3c359f34c74b8c521df3bd8593 (patch) | |
tree | 2779dc64554cdb38b717ce09c0e3dcbf36107ed3 /drivers/gpu/nvgpu/common/mm | |
parent | e32cc0108cf2ef5de7a17f0f6c0aa9af7faf23ed (diff) |
gpu: nvgpu: nvgpu SGL implementation
The last major item preventing the core MM code in the nvgpu
driver from being platform agnostic is the usage of Linux
scattergather tables and scattergather lists. These data
structures are used throughout the mapping code to handle
discontiguous DMA allocations and also overloaded to represent
VIDMEM allocs.
The notion of a scatter gather table is crucial to a HW device
that can handle discontiguous DMA. The GPU has a MMU which
allows the GPU to do page gathering and present a virtually
contiguous buffer to the GPU HW. As a result it makes sense
for the GPU driver to use some sort of scatter gather concept
so maximize memory usage efficiency.
To that end this patch keeps the notion of a scatter gather
list but implements it in the nvgpu common code. It is based
heavily on the Linux SGL concept. It is a singly linked list
of blocks - each representing a chunk of memory. To map or
use a DMA allocation SW must iterate over each block in the
SGL.
This patch implements the most basic level of support for this
data structure. There are certainly easy optimizations that
could be done to speed up the current implementation. However,
this patches' goal is to simply divest the core MM code from
any last Linux'isms. Speed and efficiency come next.
Change-Id: Icf44641db22d87fa1d003debbd9f71b605258e42
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1530867
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common/mm')
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/gmmu.c | 109 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/nvgpu_mem.c | 73 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/page_allocator.c | 142 |
3 files changed, 198 insertions, 126 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c index 7f486d68..41f5acdd 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu.c | |||
@@ -65,11 +65,14 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm, | |||
65 | struct gk20a *g = gk20a_from_vm(vm); | 65 | struct gk20a *g = gk20a_from_vm(vm); |
66 | u64 vaddr; | 66 | u64 vaddr; |
67 | 67 | ||
68 | struct sg_table *sgt = mem->priv.sgt; | 68 | struct nvgpu_mem_sgl *sgl = nvgpu_mem_sgl_create_from_mem(g, mem); |
69 | |||
70 | if (!sgl) | ||
71 | return -ENOMEM; | ||
69 | 72 | ||
70 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); | 73 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); |
71 | vaddr = g->ops.mm.gmmu_map(vm, addr, | 74 | vaddr = g->ops.mm.gmmu_map(vm, addr, |
72 | sgt, /* sg table */ | 75 | sgl, /* sg list */ |
73 | 0, /* sg offset */ | 76 | 0, /* sg offset */ |
74 | size, | 77 | size, |
75 | gmmu_page_size_kernel, | 78 | gmmu_page_size_kernel, |
@@ -82,8 +85,11 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm, | |||
82 | NULL, /* mapping_batch handle */ | 85 | NULL, /* mapping_batch handle */ |
83 | aperture); | 86 | aperture); |
84 | nvgpu_mutex_release(&vm->update_gmmu_lock); | 87 | nvgpu_mutex_release(&vm->update_gmmu_lock); |
88 | |||
89 | nvgpu_mem_sgl_free(g, sgl); | ||
90 | |||
85 | if (!vaddr) { | 91 | if (!vaddr) { |
86 | nvgpu_err(g, "failed to allocate va space"); | 92 | nvgpu_err(g, "failed to map buffer!"); |
87 | return 0; | 93 | return 0; |
88 | } | 94 | } |
89 | 95 | ||
@@ -91,7 +97,7 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm, | |||
91 | } | 97 | } |
92 | 98 | ||
93 | /* | 99 | /* |
94 | * Convenience wrapper over __nvgpu_gmmu_map() for non-fixed mappings. | 100 | * Map a nvgpu_mem into the GMMU. This is for kernel space to use. |
95 | */ | 101 | */ |
96 | u64 nvgpu_gmmu_map(struct vm_gk20a *vm, | 102 | u64 nvgpu_gmmu_map(struct vm_gk20a *vm, |
97 | struct nvgpu_mem *mem, | 103 | struct nvgpu_mem *mem, |
@@ -106,7 +112,7 @@ u64 nvgpu_gmmu_map(struct vm_gk20a *vm, | |||
106 | } | 112 | } |
107 | 113 | ||
108 | /* | 114 | /* |
109 | * Like nvgpu_gmmu_map() except it can work on a fixed address instead. | 115 | * Like nvgpu_gmmu_map() except this can work on a fixed address. |
110 | */ | 116 | */ |
111 | u64 nvgpu_gmmu_map_fixed(struct vm_gk20a *vm, | 117 | u64 nvgpu_gmmu_map_fixed(struct vm_gk20a *vm, |
112 | struct nvgpu_mem *mem, | 118 | struct nvgpu_mem *mem, |
@@ -407,7 +413,7 @@ static int __set_pd_level(struct vm_gk20a *vm, | |||
407 | */ | 413 | */ |
408 | target_addr = next_pd ? | 414 | target_addr = next_pd ? |
409 | nvgpu_pde_phys_addr(g, next_pd) : | 415 | nvgpu_pde_phys_addr(g, next_pd) : |
410 | g->ops.mm.gpu_phys_addr(g, attrs, phys_addr); | 416 | phys_addr; |
411 | 417 | ||
412 | l->update_entry(vm, l, | 418 | l->update_entry(vm, l, |
413 | pd, pd_idx, | 419 | pd, pd_idx, |
@@ -458,18 +464,16 @@ static int __set_pd_level(struct vm_gk20a *vm, | |||
458 | * VIDMEM version of the update_ptes logic. | 464 | * VIDMEM version of the update_ptes logic. |
459 | */ | 465 | */ |
460 | static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm, | 466 | static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm, |
461 | struct sg_table *sgt, | 467 | struct nvgpu_mem_sgl *sgl, |
462 | u64 space_to_skip, | 468 | u64 space_to_skip, |
463 | u64 virt_addr, | 469 | u64 virt_addr, |
464 | u64 length, | 470 | u64 length, |
465 | struct nvgpu_gmmu_attrs *attrs) | 471 | struct nvgpu_gmmu_attrs *attrs) |
466 | { | 472 | { |
467 | struct nvgpu_page_alloc *alloc = NULL; | ||
468 | struct page_alloc_chunk *chunk = NULL; | ||
469 | u64 phys_addr, chunk_length; | 473 | u64 phys_addr, chunk_length; |
470 | int err = 0; | 474 | int err = 0; |
471 | 475 | ||
472 | if (!sgt) { | 476 | if (!sgl) { |
473 | /* | 477 | /* |
474 | * This is considered an unmap. Just pass in 0 as the physical | 478 | * This is considered an unmap. Just pass in 0 as the physical |
475 | * address for the entire GPU range. | 479 | * address for the entire GPU range. |
@@ -482,22 +486,21 @@ static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm, | |||
482 | return err; | 486 | return err; |
483 | } | 487 | } |
484 | 488 | ||
485 | alloc = get_vidmem_page_alloc(sgt->sgl); | ||
486 | |||
487 | /* | 489 | /* |
488 | * Otherwise iterate across all the chunks in this allocation and | 490 | * Otherwise iterate across all the chunks in this allocation and |
489 | * map them. | 491 | * map them. |
490 | */ | 492 | */ |
491 | nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks, | 493 | while (sgl) { |
492 | page_alloc_chunk, list_entry) { | ||
493 | if (space_to_skip && | 494 | if (space_to_skip && |
494 | space_to_skip >= chunk->length) { | 495 | space_to_skip >= nvgpu_mem_sgl_length(sgl)) { |
495 | space_to_skip -= chunk->length; | 496 | space_to_skip -= nvgpu_mem_sgl_length(sgl); |
497 | sgl = nvgpu_mem_sgl_next(sgl); | ||
496 | continue; | 498 | continue; |
497 | } | 499 | } |
498 | 500 | ||
499 | phys_addr = chunk->base + space_to_skip; | 501 | phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip; |
500 | chunk_length = min(length, (chunk->length - space_to_skip)); | 502 | chunk_length = min(length, (nvgpu_mem_sgl_length(sgl) - |
503 | space_to_skip)); | ||
501 | 504 | ||
502 | err = __set_pd_level(vm, &vm->pdb, | 505 | err = __set_pd_level(vm, &vm->pdb, |
503 | 0, | 506 | 0, |
@@ -518,23 +521,24 @@ static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm, | |||
518 | 521 | ||
519 | if (length == 0) | 522 | if (length == 0) |
520 | break; | 523 | break; |
524 | |||
525 | sgl = nvgpu_mem_sgl_next(sgl); | ||
521 | } | 526 | } |
522 | 527 | ||
523 | return err; | 528 | return err; |
524 | } | 529 | } |
525 | 530 | ||
526 | static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, | 531 | static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, |
527 | struct sg_table *sgt, | 532 | struct nvgpu_mem_sgl *sgl, |
528 | u64 space_to_skip, | 533 | u64 space_to_skip, |
529 | u64 virt_addr, | 534 | u64 virt_addr, |
530 | u64 length, | 535 | u64 length, |
531 | struct nvgpu_gmmu_attrs *attrs) | 536 | struct nvgpu_gmmu_attrs *attrs) |
532 | { | 537 | { |
533 | int err; | 538 | int err; |
534 | struct scatterlist *sgl; | ||
535 | struct gk20a *g = gk20a_from_vm(vm); | 539 | struct gk20a *g = gk20a_from_vm(vm); |
536 | 540 | ||
537 | if (!sgt) { | 541 | if (!sgl) { |
538 | /* | 542 | /* |
539 | * This is considered an unmap. Just pass in 0 as the physical | 543 | * This is considered an unmap. Just pass in 0 as the physical |
540 | * address for the entire GPU range. | 544 | * address for the entire GPU range. |
@@ -548,19 +552,15 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, | |||
548 | } | 552 | } |
549 | 553 | ||
550 | /* | 554 | /* |
551 | * At this point we have a Linux scatter-gather list pointing to some | 555 | * At this point we have a scatter-gather list pointing to some number |
552 | * number of discontiguous chunks of memory. Iterate over that list and | 556 | * of discontiguous chunks of memory. We must iterate over that list and |
553 | * generate a GMMU map call for each chunk. There are two possibilities: | 557 | * generate a GMMU map call for each chunk. There are two possibilities: |
554 | * either the IOMMU is enabled or not. When the IOMMU is enabled the | 558 | * either an IOMMU is enabled or not. When an IOMMU is enabled the |
555 | * mapping is simple since the "physical" address is actually a virtual | 559 | * mapping is simple since the "physical" address is actually a virtual |
556 | * IO address and will be contiguous. The no-IOMMU case is more | 560 | * IO address and will be contiguous. |
557 | * complicated. We will have to iterate over the SGT and do a separate | ||
558 | * map for each chunk of the SGT. | ||
559 | */ | 561 | */ |
560 | sgl = sgt->sgl; | ||
561 | |||
562 | if (!g->mm.bypass_smmu) { | 562 | if (!g->mm.bypass_smmu) { |
563 | u64 io_addr = nvgpu_mem_get_addr_sgl(g, sgl); | 563 | u64 io_addr = nvgpu_mem_sgl_gpu_addr(g, sgl, attrs); |
564 | 564 | ||
565 | io_addr += space_to_skip; | 565 | io_addr += space_to_skip; |
566 | 566 | ||
@@ -585,14 +585,16 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, | |||
585 | /* | 585 | /* |
586 | * Cut out sgl ents for space_to_skip. | 586 | * Cut out sgl ents for space_to_skip. |
587 | */ | 587 | */ |
588 | if (space_to_skip && space_to_skip >= sgl->length) { | 588 | if (space_to_skip && |
589 | space_to_skip -= sgl->length; | 589 | space_to_skip >= nvgpu_mem_sgl_length(sgl)) { |
590 | sgl = sg_next(sgl); | 590 | space_to_skip -= nvgpu_mem_sgl_length(sgl); |
591 | sgl = nvgpu_mem_sgl_next(sgl); | ||
591 | continue; | 592 | continue; |
592 | } | 593 | } |
593 | 594 | ||
594 | phys_addr = sg_phys(sgl) + space_to_skip; | 595 | phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip; |
595 | chunk_length = min(length, sgl->length - space_to_skip); | 596 | chunk_length = min(length, |
597 | nvgpu_mem_sgl_length(sgl) - space_to_skip); | ||
596 | 598 | ||
597 | err = __set_pd_level(vm, &vm->pdb, | 599 | err = __set_pd_level(vm, &vm->pdb, |
598 | 0, | 600 | 0, |
@@ -600,13 +602,11 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, | |||
600 | virt_addr, | 602 | virt_addr, |
601 | chunk_length, | 603 | chunk_length, |
602 | attrs); | 604 | attrs); |
603 | if (err) | ||
604 | return err; | ||
605 | 605 | ||
606 | space_to_skip = 0; | 606 | space_to_skip = 0; |
607 | virt_addr += chunk_length; | 607 | virt_addr += chunk_length; |
608 | length -= chunk_length; | 608 | length -= chunk_length; |
609 | sgl = sg_next(sgl); | 609 | sgl = nvgpu_mem_sgl_next(sgl); |
610 | 610 | ||
611 | if (length == 0) | 611 | if (length == 0) |
612 | break; | 612 | break; |
@@ -624,22 +624,20 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, | |||
624 | * implementations. But the logic around that is generic to all chips. Every | 624 | * implementations. But the logic around that is generic to all chips. Every |
625 | * chip has some number of PDE levels and then a PTE level. | 625 | * chip has some number of PDE levels and then a PTE level. |
626 | * | 626 | * |
627 | * Each chunk of the incoming SGT is sent to the chip specific implementation | 627 | * Each chunk of the incoming SGL is sent to the chip specific implementation |
628 | * of page table update. | 628 | * of page table update. |
629 | * | 629 | * |
630 | * [*] Note: the "physical" address may actually be an IO virtual address in the | 630 | * [*] Note: the "physical" address may actually be an IO virtual address in the |
631 | * case of SMMU usage. | 631 | * case of SMMU usage. |
632 | */ | 632 | */ |
633 | static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, | 633 | static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, |
634 | struct sg_table *sgt, | 634 | struct nvgpu_mem_sgl *sgl, |
635 | u64 space_to_skip, | 635 | u64 space_to_skip, |
636 | u64 virt_addr, | 636 | u64 virt_addr, |
637 | u64 length, | 637 | u64 length, |
638 | struct nvgpu_gmmu_attrs *attrs) | 638 | struct nvgpu_gmmu_attrs *attrs) |
639 | { | 639 | { |
640 | struct gk20a *g = gk20a_from_vm(vm); | 640 | struct gk20a *g = gk20a_from_vm(vm); |
641 | struct nvgpu_page_alloc *alloc; | ||
642 | u64 phys_addr = 0; | ||
643 | u32 page_size; | 641 | u32 page_size; |
644 | int err; | 642 | int err; |
645 | 643 | ||
@@ -665,25 +663,16 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, | |||
665 | return err; | 663 | return err; |
666 | } | 664 | } |
667 | 665 | ||
668 | if (sgt) { | ||
669 | if (attrs->aperture == APERTURE_VIDMEM) { | ||
670 | alloc = get_vidmem_page_alloc(sgt->sgl); | ||
671 | |||
672 | phys_addr = alloc->base; | ||
673 | } else | ||
674 | phys_addr = nvgpu_mem_get_addr_sgl(g, sgt->sgl); | ||
675 | } | ||
676 | |||
677 | __gmmu_dbg(g, attrs, | 666 | __gmmu_dbg(g, attrs, |
678 | "vm=%s " | 667 | "vm=%s " |
679 | "%-5s GPU virt %#-12llx +%#-9llx phys %#-12llx " | 668 | "%-5s GPU virt %#-12llx +%#-9llx phys %#-12llx " |
680 | "phys offset: %#-4llx; pgsz: %3dkb perm=%-2s | " | 669 | "phys offset: %#-4llx; pgsz: %3dkb perm=%-2s | " |
681 | "kind=%#02x APT=%-6s %c%c%c%c%c", | 670 | "kind=%#02x APT=%-6s %c%c%c%c%c", |
682 | vm->name, | 671 | vm->name, |
683 | sgt ? "MAP" : "UNMAP", | 672 | sgl ? "MAP" : "UNMAP", |
684 | virt_addr, | 673 | virt_addr, |
685 | length, | 674 | length, |
686 | phys_addr, | 675 | sgl ? nvgpu_mem_sgl_phys(sgl) : 0, |
687 | space_to_skip, | 676 | space_to_skip, |
688 | page_size >> 10, | 677 | page_size >> 10, |
689 | nvgpu_gmmu_perm_str(attrs->rw_flag), | 678 | nvgpu_gmmu_perm_str(attrs->rw_flag), |
@@ -696,19 +685,19 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, | |||
696 | attrs->valid ? 'V' : '-'); | 685 | attrs->valid ? 'V' : '-'); |
697 | 686 | ||
698 | /* | 687 | /* |
699 | * Handle VIDMEM progamming. Currently uses a different scatter list | 688 | * For historical reasons these are separate, but soon these will be |
700 | * format. | 689 | * unified. |
701 | */ | 690 | */ |
702 | if (attrs->aperture == APERTURE_VIDMEM) | 691 | if (attrs->aperture == APERTURE_VIDMEM) |
703 | err = __nvgpu_gmmu_update_page_table_vidmem(vm, | 692 | err = __nvgpu_gmmu_update_page_table_vidmem(vm, |
704 | sgt, | 693 | sgl, |
705 | space_to_skip, | 694 | space_to_skip, |
706 | virt_addr, | 695 | virt_addr, |
707 | length, | 696 | length, |
708 | attrs); | 697 | attrs); |
709 | else | 698 | else |
710 | err = __nvgpu_gmmu_update_page_table_sysmem(vm, | 699 | err = __nvgpu_gmmu_update_page_table_sysmem(vm, |
711 | sgt, | 700 | sgl, |
712 | space_to_skip, | 701 | space_to_skip, |
713 | virt_addr, | 702 | virt_addr, |
714 | length, | 703 | length, |
@@ -717,7 +706,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, | |||
717 | unmap_gmmu_pages(g, &vm->pdb); | 706 | unmap_gmmu_pages(g, &vm->pdb); |
718 | nvgpu_smp_mb(); | 707 | nvgpu_smp_mb(); |
719 | 708 | ||
720 | __gmmu_dbg(g, attrs, "%-5s Done!", sgt ? "MAP" : "UNMAP"); | 709 | __gmmu_dbg(g, attrs, "%-5s Done!", sgl ? "MAP" : "UNMAP"); |
721 | 710 | ||
722 | return err; | 711 | return err; |
723 | } | 712 | } |
@@ -736,7 +725,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, | |||
736 | */ | 725 | */ |
737 | u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, | 726 | u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, |
738 | u64 vaddr, | 727 | u64 vaddr, |
739 | struct sg_table *sgt, | 728 | struct nvgpu_mem_sgl *sgl, |
740 | u64 buffer_offset, | 729 | u64 buffer_offset, |
741 | u64 size, | 730 | u64 size, |
742 | int pgsz_idx, | 731 | int pgsz_idx, |
@@ -785,7 +774,7 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, | |||
785 | allocated = true; | 774 | allocated = true; |
786 | } | 775 | } |
787 | 776 | ||
788 | err = __nvgpu_gmmu_update_page_table(vm, sgt, buffer_offset, | 777 | err = __nvgpu_gmmu_update_page_table(vm, sgl, buffer_offset, |
789 | vaddr, size, &attrs); | 778 | vaddr, size, &attrs); |
790 | if (err) { | 779 | if (err) { |
791 | nvgpu_err(g, "failed to update ptes on map"); | 780 | nvgpu_err(g, "failed to update ptes on map"); |
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c new file mode 100644 index 00000000..7296c673 --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c | |||
@@ -0,0 +1,73 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <nvgpu/kmem.h> | ||
18 | #include <nvgpu/nvgpu_mem.h> | ||
19 | |||
20 | #include "gk20a/gk20a.h" | ||
21 | |||
22 | struct nvgpu_mem_sgl *nvgpu_mem_sgl_next(struct nvgpu_mem_sgl *sgl) | ||
23 | { | ||
24 | return sgl->next; | ||
25 | } | ||
26 | |||
27 | u64 nvgpu_mem_sgl_phys(struct nvgpu_mem_sgl *sgl) | ||
28 | { | ||
29 | return sgl->phys; | ||
30 | } | ||
31 | |||
32 | u64 nvgpu_mem_sgl_dma(struct nvgpu_mem_sgl *sgl) | ||
33 | { | ||
34 | return sgl->dma; | ||
35 | } | ||
36 | |||
37 | u64 nvgpu_mem_sgl_length(struct nvgpu_mem_sgl *sgl) | ||
38 | { | ||
39 | return sgl->length; | ||
40 | } | ||
41 | |||
42 | /* | ||
43 | * This builds a GPU address for the %sgl based on whether an IOMMU is present | ||
44 | * or not. It also handles turning the physical address into the true GPU | ||
45 | * physical address that should be programmed into the page tables. | ||
46 | */ | ||
47 | u64 nvgpu_mem_sgl_gpu_addr(struct gk20a *g, struct nvgpu_mem_sgl *sgl, | ||
48 | struct nvgpu_gmmu_attrs *attrs) | ||
49 | { | ||
50 | if (nvgpu_mem_sgl_dma(sgl) == 0) | ||
51 | return g->ops.mm.gpu_phys_addr(g, attrs, | ||
52 | nvgpu_mem_sgl_phys(sgl)); | ||
53 | |||
54 | if (nvgpu_mem_sgl_dma(sgl) == DMA_ERROR_CODE) | ||
55 | return 0; | ||
56 | |||
57 | return gk20a_mm_smmu_vaddr_translate(g, nvgpu_mem_sgl_dma(sgl)); | ||
58 | } | ||
59 | |||
60 | void nvgpu_mem_sgl_free(struct gk20a *g, struct nvgpu_mem_sgl *sgl) | ||
61 | { | ||
62 | struct nvgpu_mem_sgl *next; | ||
63 | |||
64 | /* | ||
65 | * Free each of the elements. We expect each element to have been | ||
66 | * nvgpu_k[mz]alloc()ed. | ||
67 | */ | ||
68 | while (sgl) { | ||
69 | next = nvgpu_mem_sgl_next(sgl); | ||
70 | nvgpu_kfree(g, sgl); | ||
71 | sgl = next; | ||
72 | } | ||
73 | } | ||
diff --git a/drivers/gpu/nvgpu/common/mm/page_allocator.c b/drivers/gpu/nvgpu/common/mm/page_allocator.c index 72ff8f2d..6d92b457 100644 --- a/drivers/gpu/nvgpu/common/mm/page_allocator.c +++ b/drivers/gpu/nvgpu/common/mm/page_allocator.c | |||
@@ -147,19 +147,16 @@ static void __nvgpu_free_pages(struct nvgpu_page_allocator *a, | |||
147 | struct nvgpu_page_alloc *alloc, | 147 | struct nvgpu_page_alloc *alloc, |
148 | bool free_buddy_alloc) | 148 | bool free_buddy_alloc) |
149 | { | 149 | { |
150 | struct page_alloc_chunk *chunk; | 150 | struct nvgpu_mem_sgl *sgl = alloc->sgl; |
151 | 151 | ||
152 | while (!nvgpu_list_empty(&alloc->alloc_chunks)) { | 152 | if (free_buddy_alloc) { |
153 | chunk = nvgpu_list_first_entry(&alloc->alloc_chunks, | 153 | while (sgl) { |
154 | page_alloc_chunk, | 154 | nvgpu_free(&a->source_allocator, sgl->phys); |
155 | list_entry); | 155 | sgl = nvgpu_mem_sgl_next(sgl); |
156 | nvgpu_list_del(&chunk->list_entry); | 156 | } |
157 | |||
158 | if (free_buddy_alloc) | ||
159 | nvgpu_free(&a->source_allocator, chunk->base); | ||
160 | nvgpu_kmem_cache_free(a->chunk_cache, chunk); | ||
161 | } | 157 | } |
162 | 158 | ||
159 | nvgpu_mem_sgl_free(a->owner->g, alloc->sgl); | ||
163 | nvgpu_kmem_cache_free(a->alloc_cache, alloc); | 160 | nvgpu_kmem_cache_free(a->alloc_cache, alloc); |
164 | } | 161 | } |
165 | 162 | ||
@@ -243,15 +240,14 @@ static void free_slab_page(struct nvgpu_page_allocator *a, | |||
243 | } | 240 | } |
244 | 241 | ||
245 | /* | 242 | /* |
246 | * This expects @alloc to have 1 empty page_alloc_chunk already added to the | 243 | * This expects @alloc to have 1 empty sgl_entry ready for usage. |
247 | * alloc_chunks list. | ||
248 | */ | 244 | */ |
249 | static int __do_slab_alloc(struct nvgpu_page_allocator *a, | 245 | static int __do_slab_alloc(struct nvgpu_page_allocator *a, |
250 | struct page_alloc_slab *slab, | 246 | struct page_alloc_slab *slab, |
251 | struct nvgpu_page_alloc *alloc) | 247 | struct nvgpu_page_alloc *alloc) |
252 | { | 248 | { |
253 | struct page_alloc_slab_page *slab_page = NULL; | 249 | struct page_alloc_slab_page *slab_page = NULL; |
254 | struct page_alloc_chunk *chunk; | 250 | struct nvgpu_mem_sgl *sgl; |
255 | unsigned long offs; | 251 | unsigned long offs; |
256 | 252 | ||
257 | /* | 253 | /* |
@@ -302,18 +298,19 @@ static int __do_slab_alloc(struct nvgpu_page_allocator *a, | |||
302 | BUG(); /* Should be impossible to hit this. */ | 298 | BUG(); /* Should be impossible to hit this. */ |
303 | 299 | ||
304 | /* | 300 | /* |
305 | * Handle building the nvgpu_page_alloc struct. We expect one | 301 | * Handle building the nvgpu_page_alloc struct. We expect one sgl |
306 | * page_alloc_chunk to be present. | 302 | * to be present. |
307 | */ | 303 | */ |
308 | alloc->slab_page = slab_page; | 304 | alloc->slab_page = slab_page; |
309 | alloc->nr_chunks = 1; | 305 | alloc->nr_chunks = 1; |
310 | alloc->length = slab_page->slab_size; | 306 | alloc->length = slab_page->slab_size; |
311 | alloc->base = slab_page->page_addr + (offs * slab_page->slab_size); | 307 | alloc->base = slab_page->page_addr + (offs * slab_page->slab_size); |
312 | 308 | ||
313 | chunk = nvgpu_list_first_entry(&alloc->alloc_chunks, | 309 | sgl = alloc->sgl; |
314 | page_alloc_chunk, list_entry); | 310 | sgl->phys = alloc->base; |
315 | chunk->base = alloc->base; | 311 | sgl->dma = alloc->base; |
316 | chunk->length = alloc->length; | 312 | sgl->length = alloc->length; |
313 | sgl->next = NULL; | ||
317 | 314 | ||
318 | return 0; | 315 | return 0; |
319 | } | 316 | } |
@@ -327,7 +324,7 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_slab( | |||
327 | int err, slab_nr; | 324 | int err, slab_nr; |
328 | struct page_alloc_slab *slab; | 325 | struct page_alloc_slab *slab; |
329 | struct nvgpu_page_alloc *alloc = NULL; | 326 | struct nvgpu_page_alloc *alloc = NULL; |
330 | struct page_alloc_chunk *chunk = NULL; | 327 | struct nvgpu_mem_sgl *sgl = NULL; |
331 | 328 | ||
332 | /* | 329 | /* |
333 | * Align the length to a page and then divide by the page size (4k for | 330 | * Align the length to a page and then divide by the page size (4k for |
@@ -341,15 +338,13 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_slab( | |||
341 | palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n"); | 338 | palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n"); |
342 | goto fail; | 339 | goto fail; |
343 | } | 340 | } |
344 | chunk = nvgpu_kmem_cache_alloc(a->chunk_cache); | 341 | sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl)); |
345 | if (!chunk) { | 342 | if (!sgl) { |
346 | palloc_dbg(a, "OOM: could not alloc alloc_chunk struct!\n"); | 343 | palloc_dbg(a, "OOM: could not alloc sgl struct!\n"); |
347 | goto fail; | 344 | goto fail; |
348 | } | 345 | } |
349 | 346 | ||
350 | nvgpu_init_list_node(&alloc->alloc_chunks); | 347 | alloc->sgl = sgl; |
351 | nvgpu_list_add(&chunk->list_entry, &alloc->alloc_chunks); | ||
352 | |||
353 | err = __do_slab_alloc(a, slab, alloc); | 348 | err = __do_slab_alloc(a, slab, alloc); |
354 | if (err) | 349 | if (err) |
355 | goto fail; | 350 | goto fail; |
@@ -363,8 +358,8 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_slab( | |||
363 | fail: | 358 | fail: |
364 | if (alloc) | 359 | if (alloc) |
365 | nvgpu_kmem_cache_free(a->alloc_cache, alloc); | 360 | nvgpu_kmem_cache_free(a->alloc_cache, alloc); |
366 | if (chunk) | 361 | if (sgl) |
367 | nvgpu_kmem_cache_free(a->chunk_cache, chunk); | 362 | nvgpu_kfree(a->owner->g, sgl); |
368 | return NULL; | 363 | return NULL; |
369 | } | 364 | } |
370 | 365 | ||
@@ -426,7 +421,7 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages( | |||
426 | struct nvgpu_page_allocator *a, u64 pages) | 421 | struct nvgpu_page_allocator *a, u64 pages) |
427 | { | 422 | { |
428 | struct nvgpu_page_alloc *alloc; | 423 | struct nvgpu_page_alloc *alloc; |
429 | struct page_alloc_chunk *c; | 424 | struct nvgpu_mem_sgl *sgl, *prev_sgl = NULL; |
430 | u64 max_chunk_len = pages << a->page_shift; | 425 | u64 max_chunk_len = pages << a->page_shift; |
431 | int i = 0; | 426 | int i = 0; |
432 | 427 | ||
@@ -436,7 +431,6 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages( | |||
436 | 431 | ||
437 | memset(alloc, 0, sizeof(*alloc)); | 432 | memset(alloc, 0, sizeof(*alloc)); |
438 | 433 | ||
439 | nvgpu_init_list_node(&alloc->alloc_chunks); | ||
440 | alloc->length = pages << a->page_shift; | 434 | alloc->length = pages << a->page_shift; |
441 | 435 | ||
442 | while (pages) { | 436 | while (pages) { |
@@ -482,36 +476,48 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages( | |||
482 | goto fail_cleanup; | 476 | goto fail_cleanup; |
483 | } | 477 | } |
484 | 478 | ||
485 | c = nvgpu_kmem_cache_alloc(a->chunk_cache); | 479 | sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl)); |
486 | if (!c) { | 480 | if (!sgl) { |
487 | nvgpu_free(&a->source_allocator, chunk_addr); | 481 | nvgpu_free(&a->source_allocator, chunk_addr); |
488 | goto fail_cleanup; | 482 | goto fail_cleanup; |
489 | } | 483 | } |
490 | 484 | ||
491 | pages -= chunk_pages; | 485 | pages -= chunk_pages; |
492 | 486 | ||
493 | c->base = chunk_addr; | 487 | sgl->phys = chunk_addr; |
494 | c->length = chunk_len; | 488 | sgl->dma = chunk_addr; |
495 | nvgpu_list_add(&c->list_entry, &alloc->alloc_chunks); | 489 | sgl->length = chunk_len; |
490 | |||
491 | /* | ||
492 | * Build the singly linked list with a head node that is part of | ||
493 | * the list. | ||
494 | */ | ||
495 | if (prev_sgl) | ||
496 | prev_sgl->next = sgl; | ||
497 | else | ||
498 | alloc->sgl = sgl; | ||
499 | |||
500 | prev_sgl = sgl; | ||
496 | 501 | ||
497 | i++; | 502 | i++; |
498 | } | 503 | } |
499 | 504 | ||
500 | alloc->nr_chunks = i; | 505 | alloc->nr_chunks = i; |
501 | c = nvgpu_list_first_entry(&alloc->alloc_chunks, | 506 | alloc->base = alloc->sgl->phys; |
502 | page_alloc_chunk, list_entry); | ||
503 | alloc->base = c->base; | ||
504 | 507 | ||
505 | return alloc; | 508 | return alloc; |
506 | 509 | ||
507 | fail_cleanup: | 510 | fail_cleanup: |
508 | while (!nvgpu_list_empty(&alloc->alloc_chunks)) { | 511 | sgl = alloc->sgl; |
509 | c = nvgpu_list_first_entry(&alloc->alloc_chunks, | 512 | while (sgl) { |
510 | page_alloc_chunk, list_entry); | 513 | struct nvgpu_mem_sgl *next = sgl->next; |
511 | nvgpu_list_del(&c->list_entry); | 514 | |
512 | nvgpu_free(&a->source_allocator, c->base); | 515 | nvgpu_free(&a->source_allocator, sgl->phys); |
513 | nvgpu_kmem_cache_free(a->chunk_cache, c); | 516 | nvgpu_kfree(a->owner->g, sgl); |
517 | |||
518 | sgl = next; | ||
514 | } | 519 | } |
520 | |||
515 | nvgpu_kmem_cache_free(a->alloc_cache, alloc); | 521 | nvgpu_kmem_cache_free(a->alloc_cache, alloc); |
516 | fail: | 522 | fail: |
517 | return NULL; | 523 | return NULL; |
@@ -521,7 +527,7 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages( | |||
521 | struct nvgpu_page_allocator *a, u64 len) | 527 | struct nvgpu_page_allocator *a, u64 len) |
522 | { | 528 | { |
523 | struct nvgpu_page_alloc *alloc = NULL; | 529 | struct nvgpu_page_alloc *alloc = NULL; |
524 | struct page_alloc_chunk *c; | 530 | struct nvgpu_mem_sgl *sgl; |
525 | u64 pages; | 531 | u64 pages; |
526 | int i = 0; | 532 | int i = 0; |
527 | 533 | ||
@@ -536,11 +542,15 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages( | |||
536 | 542 | ||
537 | palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n", | 543 | palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n", |
538 | pages << a->page_shift, pages, alloc->base); | 544 | pages << a->page_shift, pages, alloc->base); |
539 | nvgpu_list_for_each_entry(c, &alloc->alloc_chunks, | 545 | sgl = alloc->sgl; |
540 | page_alloc_chunk, list_entry) { | 546 | while (sgl) { |
541 | palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", | 547 | palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", |
542 | i++, c->base, c->length); | 548 | i++, |
549 | nvgpu_mem_sgl_phys(sgl), | ||
550 | nvgpu_mem_sgl_length(sgl)); | ||
551 | sgl = sgl->next; | ||
543 | } | 552 | } |
553 | palloc_dbg(a, "Alloc done\n"); | ||
544 | 554 | ||
545 | return alloc; | 555 | return alloc; |
546 | } | 556 | } |
@@ -638,11 +648,11 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed( | |||
638 | struct nvgpu_page_allocator *a, u64 base, u64 length, u32 unused) | 648 | struct nvgpu_page_allocator *a, u64 base, u64 length, u32 unused) |
639 | { | 649 | { |
640 | struct nvgpu_page_alloc *alloc; | 650 | struct nvgpu_page_alloc *alloc; |
641 | struct page_alloc_chunk *c; | 651 | struct nvgpu_mem_sgl *sgl; |
642 | 652 | ||
643 | alloc = nvgpu_kmem_cache_alloc(a->alloc_cache); | 653 | alloc = nvgpu_kmem_cache_alloc(a->alloc_cache); |
644 | c = nvgpu_kmem_cache_alloc(a->chunk_cache); | 654 | sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl)); |
645 | if (!alloc || !c) | 655 | if (!alloc || !sgl) |
646 | goto fail; | 656 | goto fail; |
647 | 657 | ||
648 | alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length, 0); | 658 | alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length, 0); |
@@ -653,17 +663,18 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed( | |||
653 | 663 | ||
654 | alloc->nr_chunks = 1; | 664 | alloc->nr_chunks = 1; |
655 | alloc->length = length; | 665 | alloc->length = length; |
656 | nvgpu_init_list_node(&alloc->alloc_chunks); | 666 | alloc->sgl = sgl; |
657 | 667 | ||
658 | c->base = alloc->base; | 668 | sgl->phys = alloc->base; |
659 | c->length = length; | 669 | sgl->dma = alloc->base; |
660 | nvgpu_list_add(&c->list_entry, &alloc->alloc_chunks); | 670 | sgl->length = length; |
671 | sgl->next = NULL; | ||
661 | 672 | ||
662 | return alloc; | 673 | return alloc; |
663 | 674 | ||
664 | fail: | 675 | fail: |
665 | if (c) | 676 | if (sgl) |
666 | nvgpu_kmem_cache_free(a->chunk_cache, c); | 677 | nvgpu_kfree(a->owner->g, sgl); |
667 | if (alloc) | 678 | if (alloc) |
668 | nvgpu_kmem_cache_free(a->alloc_cache, alloc); | 679 | nvgpu_kmem_cache_free(a->alloc_cache, alloc); |
669 | return NULL; | 680 | return NULL; |
@@ -677,7 +688,7 @@ static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a, | |||
677 | { | 688 | { |
678 | struct nvgpu_page_allocator *a = page_allocator(__a); | 689 | struct nvgpu_page_allocator *a = page_allocator(__a); |
679 | struct nvgpu_page_alloc *alloc = NULL; | 690 | struct nvgpu_page_alloc *alloc = NULL; |
680 | struct page_alloc_chunk *c; | 691 | struct nvgpu_mem_sgl *sgl; |
681 | u64 aligned_len, pages; | 692 | u64 aligned_len, pages; |
682 | int i = 0; | 693 | int i = 0; |
683 | 694 | ||
@@ -697,10 +708,13 @@ static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a, | |||
697 | 708 | ||
698 | palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)\n", | 709 | palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)\n", |
699 | alloc->base, aligned_len, pages); | 710 | alloc->base, aligned_len, pages); |
700 | nvgpu_list_for_each_entry(c, &alloc->alloc_chunks, | 711 | sgl = alloc->sgl; |
701 | page_alloc_chunk, list_entry) { | 712 | while (sgl) { |
702 | palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", | 713 | palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", |
703 | i++, c->base, c->length); | 714 | i++, |
715 | nvgpu_mem_sgl_phys(sgl), | ||
716 | nvgpu_mem_sgl_length(sgl)); | ||
717 | sgl = sgl->next; | ||
704 | } | 718 | } |
705 | 719 | ||
706 | a->nr_fixed_allocs++; | 720 | a->nr_fixed_allocs++; |
@@ -896,11 +910,9 @@ int nvgpu_page_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a, | |||
896 | 910 | ||
897 | a->alloc_cache = nvgpu_kmem_cache_create(g, | 911 | a->alloc_cache = nvgpu_kmem_cache_create(g, |
898 | sizeof(struct nvgpu_page_alloc)); | 912 | sizeof(struct nvgpu_page_alloc)); |
899 | a->chunk_cache = nvgpu_kmem_cache_create(g, | ||
900 | sizeof(struct page_alloc_chunk)); | ||
901 | a->slab_page_cache = nvgpu_kmem_cache_create(g, | 913 | a->slab_page_cache = nvgpu_kmem_cache_create(g, |
902 | sizeof(struct page_alloc_slab_page)); | 914 | sizeof(struct page_alloc_slab_page)); |
903 | if (!a->alloc_cache || !a->chunk_cache || !a->slab_page_cache) { | 915 | if (!a->alloc_cache || !a->slab_page_cache) { |
904 | err = -ENOMEM; | 916 | err = -ENOMEM; |
905 | goto fail; | 917 | goto fail; |
906 | } | 918 | } |
@@ -941,8 +953,6 @@ int nvgpu_page_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a, | |||
941 | fail: | 953 | fail: |
942 | if (a->alloc_cache) | 954 | if (a->alloc_cache) |
943 | nvgpu_kmem_cache_destroy(a->alloc_cache); | 955 | nvgpu_kmem_cache_destroy(a->alloc_cache); |
944 | if (a->chunk_cache) | ||
945 | nvgpu_kmem_cache_destroy(a->chunk_cache); | ||
946 | if (a->slab_page_cache) | 956 | if (a->slab_page_cache) |
947 | nvgpu_kmem_cache_destroy(a->slab_page_cache); | 957 | nvgpu_kmem_cache_destroy(a->slab_page_cache); |
948 | nvgpu_kfree(g, a); | 958 | nvgpu_kfree(g, a); |