summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/common/linux/nvgpu_mem.c19
-rw-r--r--drivers/gpu/nvgpu/common/linux/vm.c71
-rw-r--r--drivers/gpu/nvgpu/common/mm/gmmu.c4
-rw-r--r--drivers/gpu/nvgpu/common/mm/nvgpu_mem.c52
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h12
-rw-r--r--drivers/gpu/nvgpu/vgpu/mm_vgpu.c2
6 files changed, 86 insertions, 74 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
index 1dbbd1a0..2587d56a 100644
--- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
@@ -529,6 +529,12 @@ static u64 nvgpu_mem_linux_sgl_gpu_addr(struct gk20a *g, void *sgl,
529 sg_dma_address((struct scatterlist *)sgl)); 529 sg_dma_address((struct scatterlist *)sgl));
530} 530}
531 531
532static bool nvgpu_mem_linux_sgt_iommuable(struct gk20a *g,
533 struct nvgpu_sgt *sgt)
534{
535 return true;
536}
537
532static void nvgpu_mem_linux_sgl_free(struct gk20a *g, struct nvgpu_sgt *sgt) 538static void nvgpu_mem_linux_sgl_free(struct gk20a *g, struct nvgpu_sgt *sgt)
533{ 539{
534 /* 540 /*
@@ -539,12 +545,13 @@ static void nvgpu_mem_linux_sgl_free(struct gk20a *g, struct nvgpu_sgt *sgt)
539} 545}
540 546
541static const struct nvgpu_sgt_ops nvgpu_linux_sgt_ops = { 547static const struct nvgpu_sgt_ops nvgpu_linux_sgt_ops = {
542 .sgl_next = nvgpu_mem_linux_sgl_next, 548 .sgl_next = nvgpu_mem_linux_sgl_next,
543 .sgl_phys = nvgpu_mem_linux_sgl_phys, 549 .sgl_phys = nvgpu_mem_linux_sgl_phys,
544 .sgl_dma = nvgpu_mem_linux_sgl_dma, 550 .sgl_dma = nvgpu_mem_linux_sgl_dma,
545 .sgl_length = nvgpu_mem_linux_sgl_length, 551 .sgl_length = nvgpu_mem_linux_sgl_length,
546 .sgl_gpu_addr = nvgpu_mem_linux_sgl_gpu_addr, 552 .sgl_gpu_addr = nvgpu_mem_linux_sgl_gpu_addr,
547 .sgt_free = nvgpu_mem_linux_sgl_free, 553 .sgt_iommuable = nvgpu_mem_linux_sgt_iommuable,
554 .sgt_free = nvgpu_mem_linux_sgl_free,
548}; 555};
549 556
550static struct nvgpu_sgt *__nvgpu_mem_get_sgl_from_vidmem( 557static struct nvgpu_sgt *__nvgpu_mem_get_sgl_from_vidmem(
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c
index 895a5771..006216c2 100644
--- a/drivers/gpu/nvgpu/common/linux/vm.c
+++ b/drivers/gpu/nvgpu/common/linux/vm.c
@@ -59,62 +59,6 @@ static struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_reverse(
59 return NULL; 59 return NULL;
60} 60}
61 61
62/*
63 * Determine alignment for a passed buffer. Necessary since the buffer may
64 * appear big to map with large pages but the SGL may have chunks that are not
65 * aligned on a 64/128kB large page boundary.
66 */
67static u64 nvgpu_get_buffer_alignment(struct gk20a *g, struct scatterlist *sgl,
68 enum nvgpu_aperture aperture)
69{
70 u64 align = 0, chunk_align = 0;
71 u64 buf_addr;
72
73 if (aperture == APERTURE_VIDMEM) {
74 struct nvgpu_page_alloc *alloc =
75 nvgpu_vidmem_get_page_alloc(sgl);
76 struct nvgpu_sgt *sgt = &alloc->sgt;
77 void *sgl_vid = sgt->sgl;
78
79 while (sgl_vid) {
80 chunk_align = 1ULL <<
81 __ffs(nvgpu_sgt_get_phys(sgt, sgl_vid)) |
82 nvgpu_sgt_get_length(sgt, sgl_vid);
83
84 if (align)
85 align = min(align, chunk_align);
86 else
87 align = chunk_align;
88
89 sgl_vid = nvgpu_sgt_get_next(sgt, sgl_vid);
90 }
91
92 return align;
93 }
94
95 buf_addr = (u64)sg_dma_address(sgl);
96
97 if (g->mm.bypass_smmu || buf_addr == DMA_ERROR_CODE || !buf_addr) {
98 while (sgl) {
99 buf_addr = (u64)sg_phys(sgl);
100 chunk_align = 1ULL << __ffs(buf_addr |
101 (u64)sgl->length);
102
103 if (align)
104 align = min(align, chunk_align);
105 else
106 align = chunk_align;
107 sgl = sg_next(sgl);
108 }
109
110 return align;
111 }
112
113 align = 1ULL << __ffs(buf_addr);
114
115 return align;
116}
117
118int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va, 62int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va,
119 struct dma_buf **dmabuf, 63 struct dma_buf **dmabuf,
120 u64 *offset) 64 u64 *offset)
@@ -218,7 +162,7 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm,
218 struct nvgpu_ctag_buffer_info binfo = { 0 }; 162 struct nvgpu_ctag_buffer_info binfo = { 0 };
219 struct gk20a_comptags comptags; 163 struct gk20a_comptags comptags;
220 struct nvgpu_vm_area *vm_area = NULL; 164 struct nvgpu_vm_area *vm_area = NULL;
221 struct nvgpu_sgt *nvgpu_sgt; 165 struct nvgpu_sgt *nvgpu_sgt = NULL;
222 struct sg_table *sgt; 166 struct sg_table *sgt;
223 struct nvgpu_mapped_buf *mapped_buffer = NULL; 167 struct nvgpu_mapped_buf *mapped_buffer = NULL;
224 enum nvgpu_aperture aperture; 168 enum nvgpu_aperture aperture;
@@ -279,6 +223,10 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm,
279 goto clean_up; 223 goto clean_up;
280 } 224 }
281 225
226 nvgpu_sgt = nvgpu_linux_sgt_create(g, sgt);
227 if (!nvgpu_sgt)
228 goto clean_up;
229
282 aperture = gk20a_dmabuf_aperture(g, dmabuf); 230 aperture = gk20a_dmabuf_aperture(g, dmabuf);
283 if (aperture == APERTURE_INVALID) { 231 if (aperture == APERTURE_INVALID) {
284 err = -EINVAL; 232 err = -EINVAL;
@@ -288,7 +236,7 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm,
288 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) 236 if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)
289 map_offset = offset_align; 237 map_offset = offset_align;
290 238
291 align = nvgpu_get_buffer_alignment(g, sgt->sgl, aperture); 239 align = nvgpu_sgt_alignment(g, nvgpu_sgt);
292 if (g->mm.disable_bigpage) 240 if (g->mm.disable_bigpage)
293 binfo.pgsz_idx = gmmu_page_size_small; 241 binfo.pgsz_idx = gmmu_page_size_small;
294 else 242 else
@@ -370,8 +318,6 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm,
370 ctag_offset += buffer_offset >> 318 ctag_offset += buffer_offset >>
371 ilog2(g->ops.fb.compression_page_size(g)); 319 ilog2(g->ops.fb.compression_page_size(g));
372 320
373 nvgpu_sgt = nvgpu_linux_sgt_create(g, sgt);
374
375 /* update gmmu ptes */ 321 /* update gmmu ptes */
376 map_offset = g->ops.mm.gmmu_map(vm, 322 map_offset = g->ops.mm.gmmu_map(vm,
377 map_offset, 323 map_offset,
@@ -391,7 +337,7 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm,
391 if (!map_offset) 337 if (!map_offset)
392 goto clean_up; 338 goto clean_up;
393 339
394 nvgpu_sgt_free(nvgpu_sgt, g); 340 nvgpu_sgt_free(g, nvgpu_sgt);
395 341
396 mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer)); 342 mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer));
397 if (!mapped_buffer) { 343 if (!mapped_buffer) {
@@ -434,6 +380,9 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm,
434 380
435clean_up: 381clean_up:
436 nvgpu_kfree(g, mapped_buffer); 382 nvgpu_kfree(g, mapped_buffer);
383
384 if (nvgpu_sgt)
385 nvgpu_sgt_free(g, nvgpu_sgt);
437 if (va_allocated) 386 if (va_allocated)
438 __nvgpu_vm_free_va(vm, map_offset, binfo.pgsz_idx); 387 __nvgpu_vm_free_va(vm, map_offset, binfo.pgsz_idx);
439 if (!IS_ERR(sgt)) 388 if (!IS_ERR(sgt))
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
index 875bcc4e..4289104d 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -93,7 +93,7 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
93 aperture); 93 aperture);
94 nvgpu_mutex_release(&vm->update_gmmu_lock); 94 nvgpu_mutex_release(&vm->update_gmmu_lock);
95 95
96 nvgpu_sgt_free(sgt, g); 96 nvgpu_sgt_free(g, sgt);
97 97
98 if (!vaddr) { 98 if (!vaddr) {
99 nvgpu_err(g, "failed to map buffer!"); 99 nvgpu_err(g, "failed to map buffer!");
@@ -500,7 +500,7 @@ static int __nvgpu_gmmu_do_update_page_table(struct vm_gk20a *vm,
500 * IO address and will be contiguous. 500 * IO address and will be contiguous.
501 */ 501 */
502 if (attrs->aperture == APERTURE_SYSMEM && !g->mm.bypass_smmu) { 502 if (attrs->aperture == APERTURE_SYSMEM && !g->mm.bypass_smmu) {
503 u64 io_addr = nvgpu_sgt_get_gpu_addr(sgt, g, sgt->sgl, attrs); 503 u64 io_addr = nvgpu_sgt_get_gpu_addr(g, sgt, sgt->sgl, attrs);
504 504
505 io_addr += space_to_skip; 505 io_addr += space_to_skip;
506 506
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
index 9f677058..b4e718b4 100644
--- a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
@@ -47,13 +47,20 @@ u64 nvgpu_sgt_get_length(struct nvgpu_sgt *sgt, void *sgl)
47 return sgt->ops->sgl_length(sgl); 47 return sgt->ops->sgl_length(sgl);
48} 48}
49 49
50u64 nvgpu_sgt_get_gpu_addr(struct nvgpu_sgt *sgt, struct gk20a *g, void *sgl, 50u64 nvgpu_sgt_get_gpu_addr(struct gk20a *g, struct nvgpu_sgt *sgt, void *sgl,
51 struct nvgpu_gmmu_attrs *attrs) 51 struct nvgpu_gmmu_attrs *attrs)
52{ 52{
53 return sgt->ops->sgl_gpu_addr(g, sgl, attrs); 53 return sgt->ops->sgl_gpu_addr(g, sgl, attrs);
54} 54}
55 55
56void nvgpu_sgt_free(struct nvgpu_sgt *sgt, struct gk20a *g) 56bool nvgpu_sgt_iommuable(struct gk20a *g, struct nvgpu_sgt *sgt)
57{
58 if (sgt->ops->sgt_iommuable)
59 return sgt->ops->sgt_iommuable(g, sgt);
60 return false;
61}
62
63void nvgpu_sgt_free(struct gk20a *g, struct nvgpu_sgt *sgt)
57{ 64{
58 if (sgt && sgt->ops->sgt_free) 65 if (sgt && sgt->ops->sgt_free)
59 sgt->ops->sgt_free(g, sgt); 66 sgt->ops->sgt_free(g, sgt);
@@ -69,3 +76,44 @@ u64 nvgpu_mem_iommu_translate(struct gk20a *g, u64 phys)
69 76
70 return phys; 77 return phys;
71} 78}
79
80/*
81 * Determine alignment for a passed buffer. Necessary since the buffer may
82 * appear big enough to map with large pages but the SGL may have chunks that
83 * are not aligned on a 64/128kB large page boundary. There's also the
84 * possibility chunks are odd sizes which will necessitate small page mappings
85 * to correctly glue them together into a contiguous virtual mapping.
86 */
87u64 nvgpu_sgt_alignment(struct gk20a *g, struct nvgpu_sgt *sgt)
88{
89 u64 align = 0, chunk_align = 0;
90 void *sgl;
91
92 /*
93 * If this SGT is iommuable and we want to use the IOMMU address then
94 * the SGT's first entry has the IOMMU address. We will align on this
95 * and double check length of buffer later. Also, since there's an
96 * IOMMU we know that this DMA address is contiguous.
97 */
98 if (!g->mm.bypass_smmu &&
99 nvgpu_sgt_iommuable(g, sgt) &&
100 nvgpu_sgt_get_dma(sgt, sgt->sgl))
101 return 1ULL << __ffs(nvgpu_sgt_get_dma(sgt, sgt->sgl));
102
103 /*
104 * Otherwise the buffer is not iommuable (VIDMEM, for example) or we are
105 * bypassing the IOMMU and need to use the underlying physical entries
106 * of the SGT.
107 */
108 nvgpu_sgt_for_each_sgl(sgl, sgt) {
109 chunk_align = 1ULL << __ffs(nvgpu_sgt_get_phys(sgt, sgl) |
110 nvgpu_sgt_get_length(sgt, sgl));
111
112 if (align)
113 align = min(align, chunk_align);
114 else
115 align = chunk_align;
116 }
117
118 return align;
119}
diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
index 6feacff7..2b8b7015 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
@@ -62,6 +62,11 @@ struct nvgpu_sgt_ops {
62 u64 (*sgl_gpu_addr)(struct gk20a *g, void *sgl, 62 u64 (*sgl_gpu_addr)(struct gk20a *g, void *sgl,
63 struct nvgpu_gmmu_attrs *attrs); 63 struct nvgpu_gmmu_attrs *attrs);
64 /* 64 /*
65 * If left NULL then iommuable is assumed to be false.
66 */
67 bool (*sgt_iommuable)(struct gk20a *g, struct nvgpu_sgt *sgt);
68
69 /*
65 * Note: this operates on the whole SGT not a specific SGL entry. 70 * Note: this operates on the whole SGT not a specific SGL entry.
66 */ 71 */
67 void (*sgt_free)(struct gk20a *g, struct nvgpu_sgt *sgt); 72 void (*sgt_free)(struct gk20a *g, struct nvgpu_sgt *sgt);
@@ -232,9 +237,12 @@ void *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt, void *sgl);
232u64 nvgpu_sgt_get_phys(struct nvgpu_sgt *sgt, void *sgl); 237u64 nvgpu_sgt_get_phys(struct nvgpu_sgt *sgt, void *sgl);
233u64 nvgpu_sgt_get_dma(struct nvgpu_sgt *sgt, void *sgl); 238u64 nvgpu_sgt_get_dma(struct nvgpu_sgt *sgt, void *sgl);
234u64 nvgpu_sgt_get_length(struct nvgpu_sgt *sgt, void *sgl); 239u64 nvgpu_sgt_get_length(struct nvgpu_sgt *sgt, void *sgl);
235u64 nvgpu_sgt_get_gpu_addr(struct nvgpu_sgt *sgt, struct gk20a *g, void *sgl, 240u64 nvgpu_sgt_get_gpu_addr(struct gk20a *g, struct nvgpu_sgt *sgt, void *sgl,
236 struct nvgpu_gmmu_attrs *attrs); 241 struct nvgpu_gmmu_attrs *attrs);
237void nvgpu_sgt_free(struct nvgpu_sgt *sgt, struct gk20a *g); 242void nvgpu_sgt_free(struct gk20a *g, struct nvgpu_sgt *sgt);
243
244bool nvgpu_sgt_iommuable(struct gk20a *g, struct nvgpu_sgt *sgt);
245u64 nvgpu_sgt_alignment(struct gk20a *g, struct nvgpu_sgt *sgt);
238 246
239/** 247/**
240 * nvgpu_mem_create_from_mem - Create a new nvgpu_mem struct from an old one. 248 * nvgpu_mem_create_from_mem - Create a new nvgpu_mem struct from an old one.
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
index 498a1528..a125366a 100644
--- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -107,7 +107,7 @@ u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
107 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d); 107 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d);
108 struct tegra_vgpu_cmd_msg msg; 108 struct tegra_vgpu_cmd_msg msg;
109 struct tegra_vgpu_as_map_params *p = &msg.params.as_map; 109 struct tegra_vgpu_as_map_params *p = &msg.params.as_map;
110 u64 addr = nvgpu_sgt_get_gpu_addr(sgt, g, sgt->sgl, NULL); 110 u64 addr = nvgpu_sgt_get_gpu_addr(g, sgt, sgt->sgl, NULL);
111 u8 prot; 111 u8 prot;
112 112
113 gk20a_dbg_fn(""); 113 gk20a_dbg_fn("");