diff options
author | Alex Waterman <alexw@nvidia.com> | 2018-02-27 12:34:21 -0500 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-04-23 20:14:09 -0400 |
commit | 855d8f2379f3a157977c17ba125a422b9f74c365 (patch) | |
tree | d9f0278d61b9abd82e4b8d5f618ce4bf94ba9370 /drivers | |
parent | e9a6d179a42e7bdd6bb4876fb14f4ff7ab0df852 (diff) |
gpu: nvgpu: Check for all sysmem apertures in GMMU
Allow a potential IOMMU'ed GMMU mapping for all SYSMEM buffers
inlcuding coherent sysmem. Typically this won't actually happen
since IO coherent mappings will also often be accessed over
NVLINK which is physically addressed.
Also update the comments surrounding this code to take into
account the new NVLINK nuances. Since NVLINK buffers are
directly mapped even when the IOMMU is enabled this is very
deserving of a comment explaining what's going on.
Lastly add some simple functions for checking if an nvgpu_mem
(or a particular aperture field) is a sysmem aperture. Currently
this includes SYSMEM and SYSMEM_COH.
JIRA EVLR-2333
Change-Id: I992d3c25d433778eaad9eef338aa5aa42afe597e
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1665185
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/gmmu.c | 22 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/nvgpu_mem.c | 9 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h | 3 |
3 files changed, 27 insertions, 7 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c index 89d71b3b..0b57b88d 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu.c | |||
@@ -515,12 +515,20 @@ static int __nvgpu_gmmu_do_update_page_table(struct vm_gk20a *vm, | |||
515 | /* | 515 | /* |
516 | * At this point we have a scatter-gather list pointing to some number | 516 | * At this point we have a scatter-gather list pointing to some number |
517 | * of discontiguous chunks of memory. We must iterate over that list and | 517 | * of discontiguous chunks of memory. We must iterate over that list and |
518 | * generate a GMMU map call for each chunk. There are two possibilities: | 518 | * generate a GMMU map call for each chunk. There are several |
519 | * either an IOMMU is enabled or not. When an IOMMU is enabled the | 519 | * possibilities: |
520 | * mapping is simple since the "physical" address is actually a virtual | 520 | * |
521 | * IO address and will be contiguous. | 521 | * 1. IOMMU enabled, IOMMU addressing (typical iGPU) |
522 | * 2. IOMMU enabled, IOMMU bypass (NVLINK bypasses SMMU) | ||
523 | * 3. IOMMU disabled (less common but still supported) | ||
524 | * 4. VIDMEM | ||
525 | * | ||
526 | * For (1) we can assume that there's really only one actual SG chunk | ||
527 | * since the IOMMU gives us a single contiguous address range. However, | ||
528 | * for (2), (3) and (4) we have to actually go through each SG entry and | ||
529 | * map each chunk individually. | ||
522 | */ | 530 | */ |
523 | if (attrs->aperture == APERTURE_SYSMEM && | 531 | if (nvgpu_aperture_is_sysmem(attrs->aperture) && |
524 | nvgpu_iommuable(g) && | 532 | nvgpu_iommuable(g) && |
525 | nvgpu_sgt_iommuable(g, sgt)) { | 533 | nvgpu_sgt_iommuable(g, sgt)) { |
526 | u64 io_addr = nvgpu_sgt_get_gpu_addr(g, sgt, sgt->sgl, attrs); | 534 | u64 io_addr = nvgpu_sgt_get_gpu_addr(g, sgt, sgt->sgl, attrs); |
@@ -538,8 +546,8 @@ static int __nvgpu_gmmu_do_update_page_table(struct vm_gk20a *vm, | |||
538 | } | 546 | } |
539 | 547 | ||
540 | /* | 548 | /* |
541 | * Finally: last possible case: do the no-IOMMU mapping. In this case we | 549 | * Handle cases (2), (3), and (4): do the no-IOMMU mapping. In this case |
542 | * really are mapping physical pages directly. | 550 | * we really are mapping physical pages directly. |
543 | */ | 551 | */ |
544 | nvgpu_sgt_for_each_sgl(sgl, sgt) { | 552 | nvgpu_sgt_for_each_sgl(sgl, sgt) { |
545 | u64 phys_addr; | 553 | u64 phys_addr; |
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c index 4def4938..855d455d 100644 --- a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c +++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c | |||
@@ -74,6 +74,15 @@ u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem, | |||
74 | sysmem_mask, sysmem_coh_mask, vidmem_mask); | 74 | sysmem_mask, sysmem_coh_mask, vidmem_mask); |
75 | } | 75 | } |
76 | 76 | ||
77 | bool nvgpu_aperture_is_sysmem(enum nvgpu_aperture ap) | ||
78 | { | ||
79 | return ap == __APERTURE_SYSMEM_COH || ap == APERTURE_SYSMEM; | ||
80 | } | ||
81 | |||
82 | bool nvgpu_mem_is_sysmem(struct nvgpu_mem *mem) | ||
83 | { | ||
84 | return nvgpu_aperture_is_sysmem(mem->aperture); | ||
85 | } | ||
77 | 86 | ||
78 | struct nvgpu_sgl *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt, | 87 | struct nvgpu_sgl *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt, |
79 | struct nvgpu_sgl *sgl) | 88 | struct nvgpu_sgl *sgl) |
diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h index 302f7287..c5e3e752 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h +++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h | |||
@@ -217,6 +217,9 @@ static inline const char *nvgpu_aperture_str(struct gk20a *g, | |||
217 | return "UNKNOWN"; | 217 | return "UNKNOWN"; |
218 | } | 218 | } |
219 | 219 | ||
220 | bool nvgpu_aperture_is_sysmem(enum nvgpu_aperture ap); | ||
221 | bool nvgpu_mem_is_sysmem(struct nvgpu_mem *mem); | ||
222 | |||
220 | /* | 223 | /* |
221 | * Returns true if the passed nvgpu_mem has been allocated (i.e it's valid for | 224 | * Returns true if the passed nvgpu_mem has been allocated (i.e it's valid for |
222 | * subsequent use). | 225 | * subsequent use). |