diff options
Diffstat (limited to 'drivers/gpu/nvgpu/common/mm/nvgpu_mem.c')
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/nvgpu_mem.c | 52 |
1 files changed, 50 insertions, 2 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c index 9f677058..b4e718b4 100644 --- a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c +++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c | |||
@@ -47,13 +47,20 @@ u64 nvgpu_sgt_get_length(struct nvgpu_sgt *sgt, void *sgl) | |||
47 | return sgt->ops->sgl_length(sgl); | 47 | return sgt->ops->sgl_length(sgl); |
48 | } | 48 | } |
49 | 49 | ||
50 | u64 nvgpu_sgt_get_gpu_addr(struct nvgpu_sgt *sgt, struct gk20a *g, void *sgl, | 50 | u64 nvgpu_sgt_get_gpu_addr(struct gk20a *g, struct nvgpu_sgt *sgt, void *sgl, |
51 | struct nvgpu_gmmu_attrs *attrs) | 51 | struct nvgpu_gmmu_attrs *attrs) |
52 | { | 52 | { |
53 | return sgt->ops->sgl_gpu_addr(g, sgl, attrs); | 53 | return sgt->ops->sgl_gpu_addr(g, sgl, attrs); |
54 | } | 54 | } |
55 | 55 | ||
56 | void nvgpu_sgt_free(struct nvgpu_sgt *sgt, struct gk20a *g) | 56 | bool nvgpu_sgt_iommuable(struct gk20a *g, struct nvgpu_sgt *sgt) |
57 | { | ||
58 | if (sgt->ops->sgt_iommuable) | ||
59 | return sgt->ops->sgt_iommuable(g, sgt); | ||
60 | return false; | ||
61 | } | ||
62 | |||
63 | void nvgpu_sgt_free(struct gk20a *g, struct nvgpu_sgt *sgt) | ||
57 | { | 64 | { |
58 | if (sgt && sgt->ops->sgt_free) | 65 | if (sgt && sgt->ops->sgt_free) |
59 | sgt->ops->sgt_free(g, sgt); | 66 | sgt->ops->sgt_free(g, sgt); |
@@ -69,3 +76,44 @@ u64 nvgpu_mem_iommu_translate(struct gk20a *g, u64 phys) | |||
69 | 76 | ||
70 | return phys; | 77 | return phys; |
71 | } | 78 | } |
79 | |||
80 | /* | ||
81 | * Determine alignment for a passed buffer. Necessary since the buffer may | ||
82 | * appear big enough to map with large pages but the SGL may have chunks that | ||
83 | * are not aligned on a 64/128kB large page boundary. There's also the | ||
84 | * possibility chunks are odd sizes which will necessitate small page mappings | ||
85 | * to correctly glue them together into a contiguous virtual mapping. | ||
86 | */ | ||
87 | u64 nvgpu_sgt_alignment(struct gk20a *g, struct nvgpu_sgt *sgt) | ||
88 | { | ||
89 | u64 align = 0, chunk_align = 0; | ||
90 | void *sgl; | ||
91 | |||
92 | /* | ||
93 | * If this SGT is iommuable and we want to use the IOMMU address then | ||
94 | * the SGT's first entry has the IOMMU address. We will align on this | ||
95 | * and double check length of buffer later. Also, since there's an | ||
96 | * IOMMU we know that this DMA address is contiguous. | ||
97 | */ | ||
98 | if (!g->mm.bypass_smmu && | ||
99 | nvgpu_sgt_iommuable(g, sgt) && | ||
100 | nvgpu_sgt_get_dma(sgt, sgt->sgl)) | ||
101 | return 1ULL << __ffs(nvgpu_sgt_get_dma(sgt, sgt->sgl)); | ||
102 | |||
103 | /* | ||
104 | * Otherwise the buffer is not iommuable (VIDMEM, for example) or we are | ||
105 | * bypassing the IOMMU and need to use the underlying physical entries | ||
106 | * of the SGT. | ||
107 | */ | ||
108 | nvgpu_sgt_for_each_sgl(sgl, sgt) { | ||
109 | chunk_align = 1ULL << __ffs(nvgpu_sgt_get_phys(sgt, sgl) | | ||
110 | nvgpu_sgt_get_length(sgt, sgl)); | ||
111 | |||
112 | if (align) | ||
113 | align = min(align, chunk_align); | ||
114 | else | ||
115 | align = chunk_align; | ||
116 | } | ||
117 | |||
118 | return align; | ||
119 | } | ||