diff options
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/nvgpu_mem.c | 19 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/vm.c | 71 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/gmmu.c | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/nvgpu_mem.c | 52 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h | 12 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/mm_vgpu.c | 2 |
6 files changed, 86 insertions, 74 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c index 1dbbd1a0..2587d56a 100644 --- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c +++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c | |||
@@ -529,6 +529,12 @@ static u64 nvgpu_mem_linux_sgl_gpu_addr(struct gk20a *g, void *sgl, | |||
529 | sg_dma_address((struct scatterlist *)sgl)); | 529 | sg_dma_address((struct scatterlist *)sgl)); |
530 | } | 530 | } |
531 | 531 | ||
532 | static bool nvgpu_mem_linux_sgt_iommuable(struct gk20a *g, | ||
533 | struct nvgpu_sgt *sgt) | ||
534 | { | ||
535 | return true; | ||
536 | } | ||
537 | |||
532 | static void nvgpu_mem_linux_sgl_free(struct gk20a *g, struct nvgpu_sgt *sgt) | 538 | static void nvgpu_mem_linux_sgl_free(struct gk20a *g, struct nvgpu_sgt *sgt) |
533 | { | 539 | { |
534 | /* | 540 | /* |
@@ -539,12 +545,13 @@ static void nvgpu_mem_linux_sgl_free(struct gk20a *g, struct nvgpu_sgt *sgt) | |||
539 | } | 545 | } |
540 | 546 | ||
541 | static const struct nvgpu_sgt_ops nvgpu_linux_sgt_ops = { | 547 | static const struct nvgpu_sgt_ops nvgpu_linux_sgt_ops = { |
542 | .sgl_next = nvgpu_mem_linux_sgl_next, | 548 | .sgl_next = nvgpu_mem_linux_sgl_next, |
543 | .sgl_phys = nvgpu_mem_linux_sgl_phys, | 549 | .sgl_phys = nvgpu_mem_linux_sgl_phys, |
544 | .sgl_dma = nvgpu_mem_linux_sgl_dma, | 550 | .sgl_dma = nvgpu_mem_linux_sgl_dma, |
545 | .sgl_length = nvgpu_mem_linux_sgl_length, | 551 | .sgl_length = nvgpu_mem_linux_sgl_length, |
546 | .sgl_gpu_addr = nvgpu_mem_linux_sgl_gpu_addr, | 552 | .sgl_gpu_addr = nvgpu_mem_linux_sgl_gpu_addr, |
547 | .sgt_free = nvgpu_mem_linux_sgl_free, | 553 | .sgt_iommuable = nvgpu_mem_linux_sgt_iommuable, |
554 | .sgt_free = nvgpu_mem_linux_sgl_free, | ||
548 | }; | 555 | }; |
549 | 556 | ||
550 | static struct nvgpu_sgt *__nvgpu_mem_get_sgl_from_vidmem( | 557 | static struct nvgpu_sgt *__nvgpu_mem_get_sgl_from_vidmem( |
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c index 895a5771..006216c2 100644 --- a/drivers/gpu/nvgpu/common/linux/vm.c +++ b/drivers/gpu/nvgpu/common/linux/vm.c | |||
@@ -59,62 +59,6 @@ static struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_reverse( | |||
59 | return NULL; | 59 | return NULL; |
60 | } | 60 | } |
61 | 61 | ||
62 | /* | ||
63 | * Determine alignment for a passed buffer. Necessary since the buffer may | ||
64 | * appear big to map with large pages but the SGL may have chunks that are not | ||
65 | * aligned on a 64/128kB large page boundary. | ||
66 | */ | ||
67 | static u64 nvgpu_get_buffer_alignment(struct gk20a *g, struct scatterlist *sgl, | ||
68 | enum nvgpu_aperture aperture) | ||
69 | { | ||
70 | u64 align = 0, chunk_align = 0; | ||
71 | u64 buf_addr; | ||
72 | |||
73 | if (aperture == APERTURE_VIDMEM) { | ||
74 | struct nvgpu_page_alloc *alloc = | ||
75 | nvgpu_vidmem_get_page_alloc(sgl); | ||
76 | struct nvgpu_sgt *sgt = &alloc->sgt; | ||
77 | void *sgl_vid = sgt->sgl; | ||
78 | |||
79 | while (sgl_vid) { | ||
80 | chunk_align = 1ULL << | ||
81 | __ffs(nvgpu_sgt_get_phys(sgt, sgl_vid)) | | ||
82 | nvgpu_sgt_get_length(sgt, sgl_vid); | ||
83 | |||
84 | if (align) | ||
85 | align = min(align, chunk_align); | ||
86 | else | ||
87 | align = chunk_align; | ||
88 | |||
89 | sgl_vid = nvgpu_sgt_get_next(sgt, sgl_vid); | ||
90 | } | ||
91 | |||
92 | return align; | ||
93 | } | ||
94 | |||
95 | buf_addr = (u64)sg_dma_address(sgl); | ||
96 | |||
97 | if (g->mm.bypass_smmu || buf_addr == DMA_ERROR_CODE || !buf_addr) { | ||
98 | while (sgl) { | ||
99 | buf_addr = (u64)sg_phys(sgl); | ||
100 | chunk_align = 1ULL << __ffs(buf_addr | | ||
101 | (u64)sgl->length); | ||
102 | |||
103 | if (align) | ||
104 | align = min(align, chunk_align); | ||
105 | else | ||
106 | align = chunk_align; | ||
107 | sgl = sg_next(sgl); | ||
108 | } | ||
109 | |||
110 | return align; | ||
111 | } | ||
112 | |||
113 | align = 1ULL << __ffs(buf_addr); | ||
114 | |||
115 | return align; | ||
116 | } | ||
117 | |||
118 | int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va, | 62 | int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va, |
119 | struct dma_buf **dmabuf, | 63 | struct dma_buf **dmabuf, |
120 | u64 *offset) | 64 | u64 *offset) |
@@ -218,7 +162,7 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm, | |||
218 | struct nvgpu_ctag_buffer_info binfo = { 0 }; | 162 | struct nvgpu_ctag_buffer_info binfo = { 0 }; |
219 | struct gk20a_comptags comptags; | 163 | struct gk20a_comptags comptags; |
220 | struct nvgpu_vm_area *vm_area = NULL; | 164 | struct nvgpu_vm_area *vm_area = NULL; |
221 | struct nvgpu_sgt *nvgpu_sgt; | 165 | struct nvgpu_sgt *nvgpu_sgt = NULL; |
222 | struct sg_table *sgt; | 166 | struct sg_table *sgt; |
223 | struct nvgpu_mapped_buf *mapped_buffer = NULL; | 167 | struct nvgpu_mapped_buf *mapped_buffer = NULL; |
224 | enum nvgpu_aperture aperture; | 168 | enum nvgpu_aperture aperture; |
@@ -279,6 +223,10 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm, | |||
279 | goto clean_up; | 223 | goto clean_up; |
280 | } | 224 | } |
281 | 225 | ||
226 | nvgpu_sgt = nvgpu_linux_sgt_create(g, sgt); | ||
227 | if (!nvgpu_sgt) | ||
228 | goto clean_up; | ||
229 | |||
282 | aperture = gk20a_dmabuf_aperture(g, dmabuf); | 230 | aperture = gk20a_dmabuf_aperture(g, dmabuf); |
283 | if (aperture == APERTURE_INVALID) { | 231 | if (aperture == APERTURE_INVALID) { |
284 | err = -EINVAL; | 232 | err = -EINVAL; |
@@ -288,7 +236,7 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm, | |||
288 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) | 236 | if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) |
289 | map_offset = offset_align; | 237 | map_offset = offset_align; |
290 | 238 | ||
291 | align = nvgpu_get_buffer_alignment(g, sgt->sgl, aperture); | 239 | align = nvgpu_sgt_alignment(g, nvgpu_sgt); |
292 | if (g->mm.disable_bigpage) | 240 | if (g->mm.disable_bigpage) |
293 | binfo.pgsz_idx = gmmu_page_size_small; | 241 | binfo.pgsz_idx = gmmu_page_size_small; |
294 | else | 242 | else |
@@ -370,8 +318,6 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm, | |||
370 | ctag_offset += buffer_offset >> | 318 | ctag_offset += buffer_offset >> |
371 | ilog2(g->ops.fb.compression_page_size(g)); | 319 | ilog2(g->ops.fb.compression_page_size(g)); |
372 | 320 | ||
373 | nvgpu_sgt = nvgpu_linux_sgt_create(g, sgt); | ||
374 | |||
375 | /* update gmmu ptes */ | 321 | /* update gmmu ptes */ |
376 | map_offset = g->ops.mm.gmmu_map(vm, | 322 | map_offset = g->ops.mm.gmmu_map(vm, |
377 | map_offset, | 323 | map_offset, |
@@ -391,7 +337,7 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm, | |||
391 | if (!map_offset) | 337 | if (!map_offset) |
392 | goto clean_up; | 338 | goto clean_up; |
393 | 339 | ||
394 | nvgpu_sgt_free(nvgpu_sgt, g); | 340 | nvgpu_sgt_free(g, nvgpu_sgt); |
395 | 341 | ||
396 | mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer)); | 342 | mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer)); |
397 | if (!mapped_buffer) { | 343 | if (!mapped_buffer) { |
@@ -434,6 +380,9 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm, | |||
434 | 380 | ||
435 | clean_up: | 381 | clean_up: |
436 | nvgpu_kfree(g, mapped_buffer); | 382 | nvgpu_kfree(g, mapped_buffer); |
383 | |||
384 | if (nvgpu_sgt) | ||
385 | nvgpu_sgt_free(g, nvgpu_sgt); | ||
437 | if (va_allocated) | 386 | if (va_allocated) |
438 | __nvgpu_vm_free_va(vm, map_offset, binfo.pgsz_idx); | 387 | __nvgpu_vm_free_va(vm, map_offset, binfo.pgsz_idx); |
439 | if (!IS_ERR(sgt)) | 388 | if (!IS_ERR(sgt)) |
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c index 875bcc4e..4289104d 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu.c | |||
@@ -93,7 +93,7 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm, | |||
93 | aperture); | 93 | aperture); |
94 | nvgpu_mutex_release(&vm->update_gmmu_lock); | 94 | nvgpu_mutex_release(&vm->update_gmmu_lock); |
95 | 95 | ||
96 | nvgpu_sgt_free(sgt, g); | 96 | nvgpu_sgt_free(g, sgt); |
97 | 97 | ||
98 | if (!vaddr) { | 98 | if (!vaddr) { |
99 | nvgpu_err(g, "failed to map buffer!"); | 99 | nvgpu_err(g, "failed to map buffer!"); |
@@ -500,7 +500,7 @@ static int __nvgpu_gmmu_do_update_page_table(struct vm_gk20a *vm, | |||
500 | * IO address and will be contiguous. | 500 | * IO address and will be contiguous. |
501 | */ | 501 | */ |
502 | if (attrs->aperture == APERTURE_SYSMEM && !g->mm.bypass_smmu) { | 502 | if (attrs->aperture == APERTURE_SYSMEM && !g->mm.bypass_smmu) { |
503 | u64 io_addr = nvgpu_sgt_get_gpu_addr(sgt, g, sgt->sgl, attrs); | 503 | u64 io_addr = nvgpu_sgt_get_gpu_addr(g, sgt, sgt->sgl, attrs); |
504 | 504 | ||
505 | io_addr += space_to_skip; | 505 | io_addr += space_to_skip; |
506 | 506 | ||
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c index 9f677058..b4e718b4 100644 --- a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c +++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c | |||
@@ -47,13 +47,20 @@ u64 nvgpu_sgt_get_length(struct nvgpu_sgt *sgt, void *sgl) | |||
47 | return sgt->ops->sgl_length(sgl); | 47 | return sgt->ops->sgl_length(sgl); |
48 | } | 48 | } |
49 | 49 | ||
50 | u64 nvgpu_sgt_get_gpu_addr(struct nvgpu_sgt *sgt, struct gk20a *g, void *sgl, | 50 | u64 nvgpu_sgt_get_gpu_addr(struct gk20a *g, struct nvgpu_sgt *sgt, void *sgl, |
51 | struct nvgpu_gmmu_attrs *attrs) | 51 | struct nvgpu_gmmu_attrs *attrs) |
52 | { | 52 | { |
53 | return sgt->ops->sgl_gpu_addr(g, sgl, attrs); | 53 | return sgt->ops->sgl_gpu_addr(g, sgl, attrs); |
54 | } | 54 | } |
55 | 55 | ||
56 | void nvgpu_sgt_free(struct nvgpu_sgt *sgt, struct gk20a *g) | 56 | bool nvgpu_sgt_iommuable(struct gk20a *g, struct nvgpu_sgt *sgt) |
57 | { | ||
58 | if (sgt->ops->sgt_iommuable) | ||
59 | return sgt->ops->sgt_iommuable(g, sgt); | ||
60 | return false; | ||
61 | } | ||
62 | |||
63 | void nvgpu_sgt_free(struct gk20a *g, struct nvgpu_sgt *sgt) | ||
57 | { | 64 | { |
58 | if (sgt && sgt->ops->sgt_free) | 65 | if (sgt && sgt->ops->sgt_free) |
59 | sgt->ops->sgt_free(g, sgt); | 66 | sgt->ops->sgt_free(g, sgt); |
@@ -69,3 +76,44 @@ u64 nvgpu_mem_iommu_translate(struct gk20a *g, u64 phys) | |||
69 | 76 | ||
70 | return phys; | 77 | return phys; |
71 | } | 78 | } |
79 | |||
80 | /* | ||
81 | * Determine alignment for a passed buffer. Necessary since the buffer may | ||
82 | * appear big enough to map with large pages but the SGL may have chunks that | ||
83 | * are not aligned on a 64/128kB large page boundary. There's also the | ||
84 | * possibility chunks are odd sizes which will necessitate small page mappings | ||
85 | * to correctly glue them together into a contiguous virtual mapping. | ||
86 | */ | ||
87 | u64 nvgpu_sgt_alignment(struct gk20a *g, struct nvgpu_sgt *sgt) | ||
88 | { | ||
89 | u64 align = 0, chunk_align = 0; | ||
90 | void *sgl; | ||
91 | |||
92 | /* | ||
93 | * If this SGT is iommuable and we want to use the IOMMU address then | ||
94 | * the SGT's first entry has the IOMMU address. We will align on this | ||
95 | * and double check length of buffer later. Also, since there's an | ||
96 | * IOMMU we know that this DMA address is contiguous. | ||
97 | */ | ||
98 | if (!g->mm.bypass_smmu && | ||
99 | nvgpu_sgt_iommuable(g, sgt) && | ||
100 | nvgpu_sgt_get_dma(sgt, sgt->sgl)) | ||
101 | return 1ULL << __ffs(nvgpu_sgt_get_dma(sgt, sgt->sgl)); | ||
102 | |||
103 | /* | ||
104 | * Otherwise the buffer is not iommuable (VIDMEM, for example) or we are | ||
105 | * bypassing the IOMMU and need to use the underlying physical entries | ||
106 | * of the SGT. | ||
107 | */ | ||
108 | nvgpu_sgt_for_each_sgl(sgl, sgt) { | ||
109 | chunk_align = 1ULL << __ffs(nvgpu_sgt_get_phys(sgt, sgl) | | ||
110 | nvgpu_sgt_get_length(sgt, sgl)); | ||
111 | |||
112 | if (align) | ||
113 | align = min(align, chunk_align); | ||
114 | else | ||
115 | align = chunk_align; | ||
116 | } | ||
117 | |||
118 | return align; | ||
119 | } | ||
diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h index 6feacff7..2b8b7015 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h +++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h | |||
@@ -62,6 +62,11 @@ struct nvgpu_sgt_ops { | |||
62 | u64 (*sgl_gpu_addr)(struct gk20a *g, void *sgl, | 62 | u64 (*sgl_gpu_addr)(struct gk20a *g, void *sgl, |
63 | struct nvgpu_gmmu_attrs *attrs); | 63 | struct nvgpu_gmmu_attrs *attrs); |
64 | /* | 64 | /* |
65 | * If left NULL then iommuable is assumed to be false. | ||
66 | */ | ||
67 | bool (*sgt_iommuable)(struct gk20a *g, struct nvgpu_sgt *sgt); | ||
68 | |||
69 | /* | ||
65 | * Note: this operates on the whole SGT not a specific SGL entry. | 70 | * Note: this operates on the whole SGT not a specific SGL entry. |
66 | */ | 71 | */ |
67 | void (*sgt_free)(struct gk20a *g, struct nvgpu_sgt *sgt); | 72 | void (*sgt_free)(struct gk20a *g, struct nvgpu_sgt *sgt); |
@@ -232,9 +237,12 @@ void *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt, void *sgl); | |||
232 | u64 nvgpu_sgt_get_phys(struct nvgpu_sgt *sgt, void *sgl); | 237 | u64 nvgpu_sgt_get_phys(struct nvgpu_sgt *sgt, void *sgl); |
233 | u64 nvgpu_sgt_get_dma(struct nvgpu_sgt *sgt, void *sgl); | 238 | u64 nvgpu_sgt_get_dma(struct nvgpu_sgt *sgt, void *sgl); |
234 | u64 nvgpu_sgt_get_length(struct nvgpu_sgt *sgt, void *sgl); | 239 | u64 nvgpu_sgt_get_length(struct nvgpu_sgt *sgt, void *sgl); |
235 | u64 nvgpu_sgt_get_gpu_addr(struct nvgpu_sgt *sgt, struct gk20a *g, void *sgl, | 240 | u64 nvgpu_sgt_get_gpu_addr(struct gk20a *g, struct nvgpu_sgt *sgt, void *sgl, |
236 | struct nvgpu_gmmu_attrs *attrs); | 241 | struct nvgpu_gmmu_attrs *attrs); |
237 | void nvgpu_sgt_free(struct nvgpu_sgt *sgt, struct gk20a *g); | 242 | void nvgpu_sgt_free(struct gk20a *g, struct nvgpu_sgt *sgt); |
243 | |||
244 | bool nvgpu_sgt_iommuable(struct gk20a *g, struct nvgpu_sgt *sgt); | ||
245 | u64 nvgpu_sgt_alignment(struct gk20a *g, struct nvgpu_sgt *sgt); | ||
238 | 246 | ||
239 | /** | 247 | /** |
240 | * nvgpu_mem_create_from_mem - Create a new nvgpu_mem struct from an old one. | 248 | * nvgpu_mem_create_from_mem - Create a new nvgpu_mem struct from an old one. |
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c index 498a1528..a125366a 100644 --- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c | |||
@@ -107,7 +107,7 @@ u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm, | |||
107 | struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d); | 107 | struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d); |
108 | struct tegra_vgpu_cmd_msg msg; | 108 | struct tegra_vgpu_cmd_msg msg; |
109 | struct tegra_vgpu_as_map_params *p = &msg.params.as_map; | 109 | struct tegra_vgpu_as_map_params *p = &msg.params.as_map; |
110 | u64 addr = nvgpu_sgt_get_gpu_addr(sgt, g, sgt->sgl, NULL); | 110 | u64 addr = nvgpu_sgt_get_gpu_addr(g, sgt, sgt->sgl, NULL); |
111 | u8 prot; | 111 | u8 prot; |
112 | 112 | ||
113 | gk20a_dbg_fn(""); | 113 | gk20a_dbg_fn(""); |