diff options
author | Alex Waterman <alexw@nvidia.com> | 2017-10-18 16:24:53 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-11-10 18:46:54 -0500 |
commit | ee4970a33f41b56f2ada6a0b5ab6f9c400e39d88 (patch) | |
tree | 65d26ac7fd8667ac10cee8330a7647e9e72a745c /drivers/gpu/nvgpu/common/linux/vm.c | |
parent | 6911b4d48c414279731580f1212e29e4b691b04c (diff) |
gpu: nvgpu: Make buf alignment generic
Drastically simplify and move the aligment computation for buffers
getting mapped into the SGT code. An SGT is all that is needed for
computing the alignment.
However, this did require that a new SGT op was added:
nvgpu_sgt_iommuable()
This function returns true if the passed SGT is IOMMU'able and must
be implemented by an SGT implementation that has IOMMU'able buffers.
If this function is left as NULL then it is assumed that the buffer
is not IOMMU'able.
Also cleanup the parameter ordering convention among all nvgpu_sgt
functions. Previously there was a mishmash of different parameter
orderings. This patch now standardizes on the gk20a first approach
seen everywhere else in the driver.
JIRA NVGPU-30
JIRA NVGPU-246
JIRA NVGPU-71
Change-Id: Ic4ab7b752847cf795c7cfafed5a07818217bba86
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1583985
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux/vm.c')
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/vm.c | 71 |
1 files changed, 10 insertions, 61 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c index 895a5771..006216c2 100644 --- a/drivers/gpu/nvgpu/common/linux/vm.c +++ b/drivers/gpu/nvgpu/common/linux/vm.c | |||
@@ -59,62 +59,6 @@ static struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_reverse( | |||
59 | return NULL; | 59 | return NULL; |
60 | } | 60 | } |
61 | 61 | ||
62 | /* | ||
63 | * Determine alignment for a passed buffer. Necessary since the buffer may | ||
64 | * appear big to map with large pages but the SGL may have chunks that are not | ||
65 | * aligned on a 64/128kB large page boundary. | ||
66 | */ | ||
67 | static u64 nvgpu_get_buffer_alignment(struct gk20a *g, struct scatterlist *sgl, | ||
68 | enum nvgpu_aperture aperture) | ||
69 | { | ||
70 | u64 align = 0, chunk_align = 0; | ||
71 | u64 buf_addr; | ||
72 | |||
73 | if (aperture == APERTURE_VIDMEM) { | ||
74 | struct nvgpu_page_alloc *alloc = | ||
75 | nvgpu_vidmem_get_page_alloc(sgl); | ||
76 | struct nvgpu_sgt *sgt = &alloc->sgt; | ||
77 | void *sgl_vid = sgt->sgl; | ||
78 | |||
79 | while (sgl_vid) { | ||
80 | chunk_align = 1ULL << | ||
81 | __ffs(nvgpu_sgt_get_phys(sgt, sgl_vid)) | | ||
82 | nvgpu_sgt_get_length(sgt, sgl_vid); | ||
83 | |||
84 | if (align) | ||
85 | align = min(align, chunk_align); | ||
86 | else | ||
87 | align = chunk_align; | ||
88 | |||
89 | sgl_vid = nvgpu_sgt_get_next(sgt, sgl_vid); | ||
90 | } | ||
91 | |||
92 | return align; | ||
93 | } | ||
94 | |||
95 | buf_addr = (u64)sg_dma_address(sgl); | ||
96 | |||
97 | if (g->mm.bypass_smmu || buf_addr == DMA_ERROR_CODE || !buf_addr) { | ||
98 | while (sgl) { | ||
99 | buf_addr = (u64)sg_phys(sgl); | ||
100 | chunk_align = 1ULL << __ffs(buf_addr | | ||
101 | (u64)sgl->length); | ||
102 | |||
103 | if (align) | ||
104 | align = min(align, chunk_align); | ||
105 | else | ||
106 | align = chunk_align; | ||
107 | sgl = sg_next(sgl); | ||
108 | } | ||
109 | |||
110 | return align; | ||
111 | } | ||
112 | |||
113 | align = 1ULL << __ffs(buf_addr); | ||
114 | |||
115 | return align; | ||
116 | } | ||
117 | |||
118 | int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va, | 62 | int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va, |
119 | struct dma_buf **dmabuf, | 63 | struct dma_buf **dmabuf, |
120 | u64 *offset) | 64 | u64 *offset) |
@@ -218,7 +162,7 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm, | |||
218 | struct nvgpu_ctag_buffer_info binfo = { 0 }; | 162 | struct nvgpu_ctag_buffer_info binfo = { 0 }; |
219 | struct gk20a_comptags comptags; | 163 | struct gk20a_comptags comptags; |
220 | struct nvgpu_vm_area *vm_area = NULL; | 164 | struct nvgpu_vm_area *vm_area = NULL; |
221 | struct nvgpu_sgt *nvgpu_sgt; | 165 | struct nvgpu_sgt *nvgpu_sgt = NULL; |
222 | struct sg_table *sgt; | 166 | struct sg_table *sgt; |
223 | struct nvgpu_mapped_buf *mapped_buffer = NULL; | 167 | struct nvgpu_mapped_buf *mapped_buffer = NULL; |
224 | enum nvgpu_aperture aperture; | 168 | enum nvgpu_aperture aperture; |
@@ -279,6 +223,10 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm, | |||
279 | goto clean_up; | 223 | goto clean_up; |
280 | } | 224 | } |
281 | 225 | ||
226 | nvgpu_sgt = nvgpu_linux_sgt_create(g, sgt); | ||
227 | if (!nvgpu_sgt) | ||
228 | goto clean_up; | ||
229 | |||
282 | aperture = gk20a_dmabuf_aperture(g, dmabuf); | 230 | aperture = gk20a_dmabuf_aperture(g, dmabuf); |
283 | if (aperture == APERTURE_INVALID) { | 231 | if (aperture == APERTURE_INVALID) { |
284 | err = -EINVAL; | 232 | err = -EINVAL; |
@@ -288,7 +236,7 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm, | |||
288 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) | 236 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) |
289 | map_offset = offset_align; | 237 | map_offset = offset_align; |
290 | 238 | ||
291 | align = nvgpu_get_buffer_alignment(g, sgt->sgl, aperture); | 239 | align = nvgpu_sgt_alignment(g, nvgpu_sgt); |
292 | if (g->mm.disable_bigpage) | 240 | if (g->mm.disable_bigpage) |
293 | binfo.pgsz_idx = gmmu_page_size_small; | 241 | binfo.pgsz_idx = gmmu_page_size_small; |
294 | else | 242 | else |
@@ -370,8 +318,6 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm, | |||
370 | ctag_offset += buffer_offset >> | 318 | ctag_offset += buffer_offset >> |
371 | ilog2(g->ops.fb.compression_page_size(g)); | 319 | ilog2(g->ops.fb.compression_page_size(g)); |
372 | 320 | ||
373 | nvgpu_sgt = nvgpu_linux_sgt_create(g, sgt); | ||
374 | |||
375 | /* update gmmu ptes */ | 321 | /* update gmmu ptes */ |
376 | map_offset = g->ops.mm.gmmu_map(vm, | 322 | map_offset = g->ops.mm.gmmu_map(vm, |
377 | map_offset, | 323 | map_offset, |
@@ -391,7 +337,7 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm, | |||
391 | if (!map_offset) | 337 | if (!map_offset) |
392 | goto clean_up; | 338 | goto clean_up; |
393 | 339 | ||
394 | nvgpu_sgt_free(nvgpu_sgt, g); | 340 | nvgpu_sgt_free(g, nvgpu_sgt); |
395 | 341 | ||
396 | mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer)); | 342 | mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer)); |
397 | if (!mapped_buffer) { | 343 | if (!mapped_buffer) { |
@@ -434,6 +380,9 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm, | |||
434 | 380 | ||
435 | clean_up: | 381 | clean_up: |
436 | nvgpu_kfree(g, mapped_buffer); | 382 | nvgpu_kfree(g, mapped_buffer); |
383 | |||
384 | if (nvgpu_sgt) | ||
385 | nvgpu_sgt_free(g, nvgpu_sgt); | ||
437 | if (va_allocated) | 386 | if (va_allocated) |
438 | __nvgpu_vm_free_va(vm, map_offset, binfo.pgsz_idx); | 387 | __nvgpu_vm_free_va(vm, map_offset, binfo.pgsz_idx); |
439 | if (!IS_ERR(sgt)) | 388 | if (!IS_ERR(sgt)) |