diff options
author | Terje Bergstrom <tbergstrom@nvidia.com> | 2015-02-26 17:37:43 -0500 |
---|---|---|
committer | Dan Willemsen <dwillemsen@nvidia.com> | 2015-04-04 21:59:26 -0400 |
commit | 7290a6cbd5d03145d6f1ca4c3eacba40f6d4f93c (patch) | |
tree | de452c09f5eef76af273041dc64997fdc351dbd6 /drivers/gpu | |
parent | bb51cf9ec6482b50f3020179965ef82f58d91a0a (diff) |
gpu: nvgpu: Implement common allocator and mem_desc
Introduce mem_desc, which holds all information needed for a buffer.
Implement helper functions for allocation and freeing that use this
data type.
Change-Id: I82c88595d058d4fb8c5c5fbf19d13269e48e422f
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/712699
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/cde_gk20a.c | 75 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/cde_gk20a.h | 10 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 12 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/debug_gk20a.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 125 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fifo_gk20a.h | 5 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 166 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 11 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 135 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 79 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/pmu_gk20a.c | 164 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/pmu_gk20a.h | 8 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/acr_gm20b.c | 107 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/acr_gm20b.h | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/fifo_gm20b.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/fifo_vgpu.c | 70 |
19 files changed, 279 insertions, 702 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c index ea01914c..fb368fda 100644 --- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c | |||
@@ -46,15 +46,11 @@ static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct gk20a *g); | |||
46 | 46 | ||
47 | static void gk20a_deinit_cde_img(struct gk20a_cde_ctx *cde_ctx) | 47 | static void gk20a_deinit_cde_img(struct gk20a_cde_ctx *cde_ctx) |
48 | { | 48 | { |
49 | struct device *dev = &cde_ctx->pdev->dev; | ||
50 | int i; | 49 | int i; |
51 | 50 | ||
52 | for (i = 0; i < cde_ctx->num_bufs; i++) { | 51 | for (i = 0; i < cde_ctx->num_bufs; i++) { |
53 | struct gk20a_cde_mem_desc *mem = cde_ctx->mem + i; | 52 | struct mem_desc *mem = cde_ctx->mem + i; |
54 | gk20a_gmmu_unmap(cde_ctx->vm, mem->gpu_va, mem->num_bytes, 1); | 53 | gk20a_gmmu_unmap_free(cde_ctx->vm, mem); |
55 | gk20a_free_sgtable(&mem->sgt); | ||
56 | dma_free_writecombine(dev, mem->num_bytes, mem->cpuva, | ||
57 | mem->iova); | ||
58 | } | 54 | } |
59 | 55 | ||
60 | kfree(cde_ctx->init_convert_cmd); | 56 | kfree(cde_ctx->init_convert_cmd); |
@@ -225,8 +221,7 @@ static int gk20a_init_cde_buf(struct gk20a_cde_ctx *cde_ctx, | |||
225 | const struct firmware *img, | 221 | const struct firmware *img, |
226 | struct gk20a_cde_hdr_buf *buf) | 222 | struct gk20a_cde_hdr_buf *buf) |
227 | { | 223 | { |
228 | struct device *dev = &cde_ctx->pdev->dev; | 224 | struct mem_desc *mem; |
229 | struct gk20a_cde_mem_desc *mem; | ||
230 | int err; | 225 | int err; |
231 | 226 | ||
232 | /* check that the file can hold the buf */ | 227 | /* check that the file can hold the buf */ |
@@ -246,49 +241,21 @@ static int gk20a_init_cde_buf(struct gk20a_cde_ctx *cde_ctx, | |||
246 | 241 | ||
247 | /* allocate buf */ | 242 | /* allocate buf */ |
248 | mem = cde_ctx->mem + cde_ctx->num_bufs; | 243 | mem = cde_ctx->mem + cde_ctx->num_bufs; |
249 | mem->num_bytes = buf->num_bytes; | 244 | err = gk20a_gmmu_alloc_map(cde_ctx->vm, buf->num_bytes, mem); |
250 | mem->cpuva = dma_alloc_writecombine(dev, mem->num_bytes, &mem->iova, | 245 | if (err) { |
251 | GFP_KERNEL); | ||
252 | if (!mem->cpuva) { | ||
253 | gk20a_warn(&cde_ctx->pdev->dev, "cde: could not allocate device memory. buffer idx = %d", | 246 | gk20a_warn(&cde_ctx->pdev->dev, "cde: could not allocate device memory. buffer idx = %d", |
254 | cde_ctx->num_bufs); | 247 | cde_ctx->num_bufs); |
255 | return -ENOMEM; | 248 | return -ENOMEM; |
256 | } | 249 | } |
257 | 250 | ||
258 | err = gk20a_get_sgtable(dev, &mem->sgt, mem->cpuva, mem->iova, | ||
259 | mem->num_bytes); | ||
260 | if (err) { | ||
261 | gk20a_warn(&cde_ctx->pdev->dev, "cde: could not get sg table. buffer idx = %d", | ||
262 | cde_ctx->num_bufs); | ||
263 | err = -ENOMEM; | ||
264 | goto err_get_sgtable; | ||
265 | } | ||
266 | |||
267 | mem->gpu_va = gk20a_gmmu_map(cde_ctx->vm, &mem->sgt, mem->num_bytes, | ||
268 | 0, | ||
269 | gk20a_mem_flag_none); | ||
270 | if (!mem->gpu_va) { | ||
271 | gk20a_warn(&cde_ctx->pdev->dev, "cde: could not map buffer to gpuva. buffer idx = %d", | ||
272 | cde_ctx->num_bufs); | ||
273 | err = -ENOMEM; | ||
274 | goto err_map_buffer; | ||
275 | } | ||
276 | |||
277 | /* copy the content */ | 251 | /* copy the content */ |
278 | if (buf->data_byte_offset != 0) | 252 | if (buf->data_byte_offset != 0) |
279 | memcpy(mem->cpuva, img->data + buf->data_byte_offset, | 253 | memcpy(mem->cpu_va, img->data + buf->data_byte_offset, |
280 | buf->num_bytes); | 254 | buf->num_bytes); |
281 | 255 | ||
282 | cde_ctx->num_bufs++; | 256 | cde_ctx->num_bufs++; |
283 | 257 | ||
284 | return 0; | 258 | return 0; |
285 | |||
286 | err_map_buffer: | ||
287 | gk20a_free_sgtable(&mem->sgt); | ||
288 | kfree(mem->sgt); | ||
289 | err_get_sgtable: | ||
290 | dma_free_writecombine(dev, mem->num_bytes, &mem->cpuva, mem->iova); | ||
291 | return err; | ||
292 | } | 259 | } |
293 | 260 | ||
294 | static int gk20a_replace_data(struct gk20a_cde_ctx *cde_ctx, void *target, | 261 | static int gk20a_replace_data(struct gk20a_cde_ctx *cde_ctx, void *target, |
@@ -340,8 +307,8 @@ static int gk20a_init_cde_replace(struct gk20a_cde_ctx *cde_ctx, | |||
340 | const struct firmware *img, | 307 | const struct firmware *img, |
341 | struct gk20a_cde_hdr_replace *replace) | 308 | struct gk20a_cde_hdr_replace *replace) |
342 | { | 309 | { |
343 | struct gk20a_cde_mem_desc *source_mem; | 310 | struct mem_desc *source_mem; |
344 | struct gk20a_cde_mem_desc *target_mem; | 311 | struct mem_desc *target_mem; |
345 | u32 *target_mem_ptr; | 312 | u32 *target_mem_ptr; |
346 | u64 vaddr; | 313 | u64 vaddr; |
347 | int err; | 314 | int err; |
@@ -356,15 +323,15 @@ static int gk20a_init_cde_replace(struct gk20a_cde_ctx *cde_ctx, | |||
356 | 323 | ||
357 | source_mem = cde_ctx->mem + replace->source_buf; | 324 | source_mem = cde_ctx->mem + replace->source_buf; |
358 | target_mem = cde_ctx->mem + replace->target_buf; | 325 | target_mem = cde_ctx->mem + replace->target_buf; |
359 | target_mem_ptr = target_mem->cpuva; | 326 | target_mem_ptr = target_mem->cpu_va; |
360 | 327 | ||
361 | if (source_mem->num_bytes < (replace->source_byte_offset + 3) || | 328 | if (source_mem->size < (replace->source_byte_offset + 3) || |
362 | target_mem->num_bytes < (replace->target_byte_offset + 3)) { | 329 | target_mem->size < (replace->target_byte_offset + 3)) { |
363 | gk20a_warn(&cde_ctx->pdev->dev, "cde: invalid buffer offsets. target_buf_offs=%lld, source_buf_offs=%lld, source_buf_size=%zu, dest_buf_size=%zu", | 330 | gk20a_warn(&cde_ctx->pdev->dev, "cde: invalid buffer offsets. target_buf_offs=%lld, source_buf_offs=%lld, source_buf_size=%zu, dest_buf_size=%zu", |
364 | replace->target_byte_offset, | 331 | replace->target_byte_offset, |
365 | replace->source_byte_offset, | 332 | replace->source_byte_offset, |
366 | source_mem->num_bytes, | 333 | source_mem->size, |
367 | target_mem->num_bytes); | 334 | target_mem->size); |
368 | return -EINVAL; | 335 | return -EINVAL; |
369 | } | 336 | } |
370 | 337 | ||
@@ -390,7 +357,7 @@ static int gk20a_init_cde_replace(struct gk20a_cde_ctx *cde_ctx, | |||
390 | static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx) | 357 | static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx) |
391 | { | 358 | { |
392 | struct gk20a *g = cde_ctx->g; | 359 | struct gk20a *g = cde_ctx->g; |
393 | struct gk20a_cde_mem_desc *target_mem; | 360 | struct mem_desc *target_mem; |
394 | u32 *target_mem_ptr; | 361 | u32 *target_mem_ptr; |
395 | u64 new_data; | 362 | u64 new_data; |
396 | int user_id = 0, i, err; | 363 | int user_id = 0, i, err; |
@@ -398,7 +365,7 @@ static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx) | |||
398 | for (i = 0; i < cde_ctx->num_params; i++) { | 365 | for (i = 0; i < cde_ctx->num_params; i++) { |
399 | struct gk20a_cde_hdr_param *param = cde_ctx->params + i; | 366 | struct gk20a_cde_hdr_param *param = cde_ctx->params + i; |
400 | target_mem = cde_ctx->mem + param->target_buf; | 367 | target_mem = cde_ctx->mem + param->target_buf; |
401 | target_mem_ptr = target_mem->cpuva; | 368 | target_mem_ptr = target_mem->cpu_va; |
402 | target_mem_ptr += (param->target_byte_offset / sizeof(u32)); | 369 | target_mem_ptr += (param->target_byte_offset / sizeof(u32)); |
403 | 370 | ||
404 | switch (param->id) { | 371 | switch (param->id) { |
@@ -472,7 +439,7 @@ static int gk20a_init_cde_param(struct gk20a_cde_ctx *cde_ctx, | |||
472 | const struct firmware *img, | 439 | const struct firmware *img, |
473 | struct gk20a_cde_hdr_param *param) | 440 | struct gk20a_cde_hdr_param *param) |
474 | { | 441 | { |
475 | struct gk20a_cde_mem_desc *target_mem; | 442 | struct mem_desc *target_mem; |
476 | 443 | ||
477 | if (param->target_buf >= cde_ctx->num_bufs) { | 444 | if (param->target_buf >= cde_ctx->num_bufs) { |
478 | gk20a_warn(&cde_ctx->pdev->dev, "cde: invalid buffer parameter. param idx = %d, target_buf=%u, num_bufs=%u", | 445 | gk20a_warn(&cde_ctx->pdev->dev, "cde: invalid buffer parameter. param idx = %d, target_buf=%u, num_bufs=%u", |
@@ -482,10 +449,10 @@ static int gk20a_init_cde_param(struct gk20a_cde_ctx *cde_ctx, | |||
482 | } | 449 | } |
483 | 450 | ||
484 | target_mem = cde_ctx->mem + param->target_buf; | 451 | target_mem = cde_ctx->mem + param->target_buf; |
485 | if (target_mem->num_bytes < (param->target_byte_offset + 3)) { | 452 | if (target_mem->size< (param->target_byte_offset + 3)) { |
486 | gk20a_warn(&cde_ctx->pdev->dev, "cde: invalid buffer parameter. param idx = %d, target_buf_offs=%lld, target_buf_size=%zu", | 453 | gk20a_warn(&cde_ctx->pdev->dev, "cde: invalid buffer parameter. param idx = %d, target_buf_offs=%lld, target_buf_size=%zu", |
487 | cde_ctx->num_params, param->target_byte_offset, | 454 | cde_ctx->num_params, param->target_byte_offset, |
488 | target_mem->num_bytes); | 455 | target_mem->size); |
489 | return -EINVAL; | 456 | return -EINVAL; |
490 | } | 457 | } |
491 | 458 | ||
@@ -563,7 +530,7 @@ static int gk20a_init_cde_command(struct gk20a_cde_ctx *cde_ctx, | |||
563 | 530 | ||
564 | gpfifo_elem = *gpfifo; | 531 | gpfifo_elem = *gpfifo; |
565 | for (i = 0; i < num_elems; i++, cmd_elem++, gpfifo_elem++) { | 532 | for (i = 0; i < num_elems; i++, cmd_elem++, gpfifo_elem++) { |
566 | struct gk20a_cde_mem_desc *target_mem; | 533 | struct mem_desc *target_mem; |
567 | 534 | ||
568 | /* validate the current entry */ | 535 | /* validate the current entry */ |
569 | if (cmd_elem->target_buf >= cde_ctx->num_bufs) { | 536 | if (cmd_elem->target_buf >= cde_ctx->num_bufs) { |
@@ -573,10 +540,10 @@ static int gk20a_init_cde_command(struct gk20a_cde_ctx *cde_ctx, | |||
573 | } | 540 | } |
574 | 541 | ||
575 | target_mem = cde_ctx->mem + cmd_elem->target_buf; | 542 | target_mem = cde_ctx->mem + cmd_elem->target_buf; |
576 | if (target_mem->num_bytes < | 543 | if (target_mem->size< |
577 | cmd_elem->target_byte_offset + cmd_elem->num_bytes) { | 544 | cmd_elem->target_byte_offset + cmd_elem->num_bytes) { |
578 | gk20a_warn(&cde_ctx->pdev->dev, "cde: target buffer cannot hold all entries (target_size=%zu, target_byte_offset=%lld, num_bytes=%llu)", | 545 | gk20a_warn(&cde_ctx->pdev->dev, "cde: target buffer cannot hold all entries (target_size=%zu, target_byte_offset=%lld, num_bytes=%llu)", |
579 | target_mem->num_bytes, | 546 | target_mem->size, |
580 | cmd_elem->target_byte_offset, | 547 | cmd_elem->target_byte_offset, |
581 | cmd_elem->num_bytes); | 548 | cmd_elem->num_bytes); |
582 | return -EINVAL; | 549 | return -EINVAL; |
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.h b/drivers/gpu/nvgpu/gk20a/cde_gk20a.h index 58480d26..a5c75ae8 100644 --- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.h | |||
@@ -205,14 +205,6 @@ enum { | |||
205 | TYPE_ARRAY | 205 | TYPE_ARRAY |
206 | }; | 206 | }; |
207 | 207 | ||
208 | struct gk20a_cde_mem_desc { | ||
209 | struct sg_table *sgt; | ||
210 | dma_addr_t iova; | ||
211 | void *cpuva; | ||
212 | size_t num_bytes; | ||
213 | u64 gpu_va; | ||
214 | }; | ||
215 | |||
216 | struct gk20a_cde_param { | 208 | struct gk20a_cde_param { |
217 | u32 id; | 209 | u32 id; |
218 | u32 padding; | 210 | u32 padding; |
@@ -228,7 +220,7 @@ struct gk20a_cde_ctx { | |||
228 | struct vm_gk20a *vm; | 220 | struct vm_gk20a *vm; |
229 | 221 | ||
230 | /* buf converter configuration */ | 222 | /* buf converter configuration */ |
231 | struct gk20a_cde_mem_desc mem[MAX_CDE_BUFS]; | 223 | struct mem_desc mem[MAX_CDE_BUFS]; |
232 | int num_bufs; | 224 | int num_bufs; |
233 | 225 | ||
234 | /* buffer patching params (where should patching be done) */ | 226 | /* buffer patching params (where should patching be done) */ |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 62092930..9a0800d1 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -89,7 +89,7 @@ int channel_gk20a_commit_va(struct channel_gk20a *c) | |||
89 | { | 89 | { |
90 | gk20a_dbg_fn(""); | 90 | gk20a_dbg_fn(""); |
91 | 91 | ||
92 | if (!c->inst_block.cpuva) | 92 | if (!c->inst_block.cpu_va) |
93 | return -ENOMEM; | 93 | return -ENOMEM; |
94 | 94 | ||
95 | gk20a_init_inst_block(&c->inst_block, c->vm, | 95 | gk20a_init_inst_block(&c->inst_block, c->vm, |
@@ -106,7 +106,7 @@ static int channel_gk20a_commit_userd(struct channel_gk20a *c) | |||
106 | 106 | ||
107 | gk20a_dbg_fn(""); | 107 | gk20a_dbg_fn(""); |
108 | 108 | ||
109 | inst_ptr = c->inst_block.cpuva; | 109 | inst_ptr = c->inst_block.cpu_va; |
110 | if (!inst_ptr) | 110 | if (!inst_ptr) |
111 | return -ENOMEM; | 111 | return -ENOMEM; |
112 | 112 | ||
@@ -134,7 +134,7 @@ static int channel_gk20a_set_schedule_params(struct channel_gk20a *c, | |||
134 | int shift = 3; | 134 | int shift = 3; |
135 | int value = timeslice_timeout; | 135 | int value = timeslice_timeout; |
136 | 136 | ||
137 | inst_ptr = c->inst_block.cpuva; | 137 | inst_ptr = c->inst_block.cpu_va; |
138 | if (!inst_ptr) | 138 | if (!inst_ptr) |
139 | return -ENOMEM; | 139 | return -ENOMEM; |
140 | 140 | ||
@@ -177,7 +177,7 @@ int channel_gk20a_setup_ramfc(struct channel_gk20a *c, | |||
177 | 177 | ||
178 | gk20a_dbg_fn(""); | 178 | gk20a_dbg_fn(""); |
179 | 179 | ||
180 | inst_ptr = c->inst_block.cpuva; | 180 | inst_ptr = c->inst_block.cpu_va; |
181 | if (!inst_ptr) | 181 | if (!inst_ptr) |
182 | return -ENOMEM; | 182 | return -ENOMEM; |
183 | 183 | ||
@@ -263,7 +263,7 @@ static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a) | |||
263 | struct fifo_engine_info_gk20a *engine_info = | 263 | struct fifo_engine_info_gk20a *engine_info = |
264 | f->engine_info + ENGINE_GR_GK20A; | 264 | f->engine_info + ENGINE_GR_GK20A; |
265 | 265 | ||
266 | u32 inst_ptr = ch_gk20a->inst_block.cpu_pa | 266 | u32 inst_ptr = sg_phys(ch_gk20a->inst_block.sgt->sgl) |
267 | >> ram_in_base_shift_v(); | 267 | >> ram_in_base_shift_v(); |
268 | 268 | ||
269 | gk20a_dbg_info("bind channel %d inst ptr 0x%08x", | 269 | gk20a_dbg_info("bind channel %d inst ptr 0x%08x", |
@@ -322,7 +322,7 @@ int channel_gk20a_alloc_inst(struct gk20a *g, struct channel_gk20a *ch) | |||
322 | return err; | 322 | return err; |
323 | 323 | ||
324 | gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx", | 324 | gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx", |
325 | ch->hw_chid, (u64)ch->inst_block.cpu_pa); | 325 | ch->hw_chid, (u64)sg_phys(ch->inst_block.sgt->sgl)); |
326 | 326 | ||
327 | gk20a_dbg_fn("done"); | 327 | gk20a_dbg_fn("done"); |
328 | return 0; | 328 | return 0; |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 034de53f..ddb91f9b 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h | |||
@@ -99,7 +99,7 @@ struct channel_gk20a { | |||
99 | 99 | ||
100 | struct channel_ctx_gk20a ch_ctx; | 100 | struct channel_ctx_gk20a ch_ctx; |
101 | 101 | ||
102 | struct inst_desc inst_block; | 102 | struct mem_desc inst_block; |
103 | struct mem_desc_sub ramfc; | 103 | struct mem_desc_sub ramfc; |
104 | 104 | ||
105 | void *userd_cpu_va; | 105 | void *userd_cpu_va; |
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index 7cda9949..217f0056 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | |||
@@ -851,7 +851,7 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, | |||
851 | gk20a_writel(g, perf_pmasys_outsize_r(), virt_size); | 851 | gk20a_writel(g, perf_pmasys_outsize_r(), virt_size); |
852 | 852 | ||
853 | /* this field is aligned to 4K */ | 853 | /* this field is aligned to 4K */ |
854 | inst_pa_page = g->mm.hwpm.inst_block.cpu_pa >> 12; | 854 | inst_pa_page = gk20a_mem_phys(&g->mm.hwpm.inst_block) >> 12; |
855 | 855 | ||
856 | /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK | 856 | /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK |
857 | * should be written last */ | 857 | * should be written last */ |
diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c index 9dfab370..ace05c07 100644 --- a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c | |||
@@ -104,7 +104,7 @@ static void gk20a_debug_show_channel(struct gk20a *g, | |||
104 | u32 syncpointa, syncpointb; | 104 | u32 syncpointa, syncpointb; |
105 | void *inst_ptr; | 105 | void *inst_ptr; |
106 | 106 | ||
107 | inst_ptr = ch->inst_block.cpuva; | 107 | inst_ptr = ch->inst_block.cpu_va; |
108 | if (!inst_ptr) | 108 | if (!inst_ptr) |
109 | return; | 109 | return; |
110 | 110 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index cf1242ab..dee58d0a 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | |||
@@ -159,7 +159,6 @@ u32 gk20a_fifo_engine_interrupt_mask(struct gk20a *g) | |||
159 | static void gk20a_remove_fifo_support(struct fifo_gk20a *f) | 159 | static void gk20a_remove_fifo_support(struct fifo_gk20a *f) |
160 | { | 160 | { |
161 | struct gk20a *g = f->g; | 161 | struct gk20a *g = f->g; |
162 | struct device *d = dev_from_gk20a(g); | ||
163 | struct fifo_engine_info_gk20a *engine_info; | 162 | struct fifo_engine_info_gk20a *engine_info; |
164 | struct fifo_runlist_info_gk20a *runlist; | 163 | struct fifo_runlist_info_gk20a *runlist; |
165 | u32 runlist_id; | 164 | u32 runlist_id; |
@@ -175,36 +174,14 @@ static void gk20a_remove_fifo_support(struct fifo_gk20a *f) | |||
175 | } | 174 | } |
176 | kfree(f->channel); | 175 | kfree(f->channel); |
177 | } | 176 | } |
178 | if (f->userd.gpu_va) | 177 | gk20a_gmmu_unmap_free(&g->mm.bar1.vm, &f->userd); |
179 | gk20a_gmmu_unmap(&g->mm.bar1.vm, | ||
180 | f->userd.gpu_va, | ||
181 | f->userd.size, | ||
182 | gk20a_mem_flag_none); | ||
183 | |||
184 | if (f->userd.sgt) | ||
185 | gk20a_free_sgtable(&f->userd.sgt); | ||
186 | |||
187 | if (f->userd.cpuva) | ||
188 | dma_free_coherent(d, | ||
189 | f->userd_total_size, | ||
190 | f->userd.cpuva, | ||
191 | f->userd.iova); | ||
192 | f->userd.cpuva = NULL; | ||
193 | f->userd.iova = 0; | ||
194 | 178 | ||
195 | engine_info = f->engine_info + ENGINE_GR_GK20A; | 179 | engine_info = f->engine_info + ENGINE_GR_GK20A; |
196 | runlist_id = engine_info->runlist_id; | 180 | runlist_id = engine_info->runlist_id; |
197 | runlist = &f->runlist_info[runlist_id]; | 181 | runlist = &f->runlist_info[runlist_id]; |
198 | 182 | ||
199 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { | 183 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) |
200 | if (runlist->mem[i].cpuva) | 184 | gk20a_gmmu_free(g, &runlist->mem[i]); |
201 | dma_free_coherent(d, | ||
202 | runlist->mem[i].size, | ||
203 | runlist->mem[i].cpuva, | ||
204 | runlist->mem[i].iova); | ||
205 | runlist->mem[i].cpuva = NULL; | ||
206 | runlist->mem[i].iova = 0; | ||
207 | } | ||
208 | 185 | ||
209 | kfree(runlist->active_channels); | 186 | kfree(runlist->active_channels); |
210 | kfree(runlist->active_tsgs); | 187 | kfree(runlist->active_tsgs); |
@@ -327,19 +304,11 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f) | |||
327 | 304 | ||
328 | runlist_size = ram_rl_entry_size_v() * f->num_channels; | 305 | runlist_size = ram_rl_entry_size_v() * f->num_channels; |
329 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { | 306 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { |
330 | dma_addr_t iova; | 307 | int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]); |
331 | 308 | if (err) { | |
332 | runlist->mem[i].cpuva = | ||
333 | dma_alloc_coherent(d, | ||
334 | runlist_size, | ||
335 | &iova, | ||
336 | GFP_KERNEL); | ||
337 | if (!runlist->mem[i].cpuva) { | ||
338 | dev_err(d, "memory allocation failed\n"); | 309 | dev_err(d, "memory allocation failed\n"); |
339 | goto clean_up_runlist; | 310 | goto clean_up_runlist; |
340 | } | 311 | } |
341 | runlist->mem[i].iova = iova; | ||
342 | runlist->mem[i].size = runlist_size; | ||
343 | } | 312 | } |
344 | mutex_init(&runlist->mutex); | 313 | mutex_init(&runlist->mutex); |
345 | 314 | ||
@@ -351,15 +320,8 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f) | |||
351 | return 0; | 320 | return 0; |
352 | 321 | ||
353 | clean_up_runlist: | 322 | clean_up_runlist: |
354 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { | 323 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) |
355 | if (runlist->mem[i].cpuva) | 324 | gk20a_gmmu_free(g, &runlist->mem[i]); |
356 | dma_free_coherent(d, | ||
357 | runlist->mem[i].size, | ||
358 | runlist->mem[i].cpuva, | ||
359 | runlist->mem[i].iova); | ||
360 | runlist->mem[i].cpuva = NULL; | ||
361 | runlist->mem[i].iova = 0; | ||
362 | } | ||
363 | 325 | ||
364 | kfree(runlist->active_channels); | 326 | kfree(runlist->active_channels); |
365 | runlist->active_channels = NULL; | 327 | runlist->active_channels = NULL; |
@@ -502,7 +464,6 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g) | |||
502 | struct fifo_gk20a *f = &g->fifo; | 464 | struct fifo_gk20a *f = &g->fifo; |
503 | struct device *d = dev_from_gk20a(g); | 465 | struct device *d = dev_from_gk20a(g); |
504 | int chid, i, err = 0; | 466 | int chid, i, err = 0; |
505 | dma_addr_t iova; | ||
506 | 467 | ||
507 | gk20a_dbg_fn(""); | 468 | gk20a_dbg_fn(""); |
508 | 469 | ||
@@ -521,43 +482,17 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g) | |||
521 | f->max_engines = ENGINE_INVAL_GK20A; | 482 | f->max_engines = ENGINE_INVAL_GK20A; |
522 | 483 | ||
523 | f->userd_entry_size = 1 << ram_userd_base_shift_v(); | 484 | f->userd_entry_size = 1 << ram_userd_base_shift_v(); |
524 | f->userd_total_size = f->userd_entry_size * f->num_channels; | ||
525 | 485 | ||
526 | f->userd.cpuva = dma_alloc_coherent(d, | 486 | err = gk20a_gmmu_alloc_map(&g->mm.bar1.vm, |
527 | f->userd_total_size, | 487 | f->userd_entry_size * f->num_channels, |
528 | &iova, | 488 | &f->userd); |
529 | GFP_KERNEL); | ||
530 | if (!f->userd.cpuva) { | ||
531 | dev_err(d, "memory allocation failed\n"); | ||
532 | err = -ENOMEM; | ||
533 | goto clean_up; | ||
534 | } | ||
535 | |||
536 | f->userd.iova = iova; | ||
537 | err = gk20a_get_sgtable(d, &f->userd.sgt, | ||
538 | f->userd.cpuva, f->userd.iova, | ||
539 | f->userd_total_size); | ||
540 | if (err) { | 489 | if (err) { |
541 | dev_err(d, "failed to create sg table\n"); | 490 | dev_err(d, "memory allocation failed\n"); |
542 | goto clean_up; | ||
543 | } | ||
544 | |||
545 | /* bar1 va */ | ||
546 | f->userd.gpu_va = gk20a_gmmu_map(&g->mm.bar1.vm, | ||
547 | &f->userd.sgt, | ||
548 | f->userd_total_size, | ||
549 | 0, /* flags */ | ||
550 | gk20a_mem_flag_none); | ||
551 | if (!f->userd.gpu_va) { | ||
552 | dev_err(d, "gmmu mapping failed\n"); | ||
553 | err = -ENOMEM; | ||
554 | goto clean_up; | 491 | goto clean_up; |
555 | } | 492 | } |
556 | 493 | ||
557 | gk20a_dbg(gpu_dbg_map, "userd bar1 va = 0x%llx", f->userd.gpu_va); | 494 | gk20a_dbg(gpu_dbg_map, "userd bar1 va = 0x%llx", f->userd.gpu_va); |
558 | 495 | ||
559 | f->userd.size = f->userd_total_size; | ||
560 | |||
561 | f->channel = kzalloc(f->num_channels * sizeof(*f->channel), | 496 | f->channel = kzalloc(f->num_channels * sizeof(*f->channel), |
562 | GFP_KERNEL); | 497 | GFP_KERNEL); |
563 | f->tsg = kzalloc(f->num_channels * sizeof(*f->tsg), | 498 | f->tsg = kzalloc(f->num_channels * sizeof(*f->tsg), |
@@ -582,9 +517,9 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g) | |||
582 | 517 | ||
583 | for (chid = 0; chid < f->num_channels; chid++) { | 518 | for (chid = 0; chid < f->num_channels; chid++) { |
584 | f->channel[chid].userd_cpu_va = | 519 | f->channel[chid].userd_cpu_va = |
585 | f->userd.cpuva + chid * f->userd_entry_size; | 520 | f->userd.cpu_va + chid * f->userd_entry_size; |
586 | f->channel[chid].userd_iova = | 521 | f->channel[chid].userd_iova = |
587 | gk20a_mm_smmu_vaddr_translate(g, f->userd.iova) | 522 | gk20a_mm_iova_addr(g, f->userd.sgt->sgl) |
588 | + chid * f->userd_entry_size; | 523 | + chid * f->userd_entry_size; |
589 | f->channel[chid].userd_gpu_va = | 524 | f->channel[chid].userd_gpu_va = |
590 | f->userd.gpu_va + chid * f->userd_entry_size; | 525 | f->userd.gpu_va + chid * f->userd_entry_size; |
@@ -607,22 +542,7 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g) | |||
607 | 542 | ||
608 | clean_up: | 543 | clean_up: |
609 | gk20a_dbg_fn("fail"); | 544 | gk20a_dbg_fn("fail"); |
610 | if (f->userd.gpu_va) | 545 | gk20a_gmmu_unmap_free(&g->mm.bar1.vm, &f->userd); |
611 | gk20a_gmmu_unmap(&g->mm.bar1.vm, | ||
612 | f->userd.gpu_va, | ||
613 | f->userd.size, | ||
614 | gk20a_mem_flag_none); | ||
615 | if (f->userd.sgt) | ||
616 | gk20a_free_sgtable(&f->userd.sgt); | ||
617 | if (f->userd.cpuva) | ||
618 | dma_free_coherent(d, | ||
619 | f->userd_total_size, | ||
620 | f->userd.cpuva, | ||
621 | f->userd.iova); | ||
622 | f->userd.cpuva = NULL; | ||
623 | f->userd.iova = 0; | ||
624 | |||
625 | memset(&f->userd, 0, sizeof(struct userd_desc)); | ||
626 | 546 | ||
627 | kfree(f->channel); | 547 | kfree(f->channel); |
628 | f->channel = NULL; | 548 | f->channel = NULL; |
@@ -657,7 +577,7 @@ static int gk20a_init_fifo_setup_hw(struct gk20a *g) | |||
657 | u32 v, v1 = 0x33, v2 = 0x55; | 577 | u32 v, v1 = 0x33, v2 = 0x55; |
658 | 578 | ||
659 | u32 bar1_vaddr = f->userd.gpu_va; | 579 | u32 bar1_vaddr = f->userd.gpu_va; |
660 | volatile u32 *cpu_vaddr = f->userd.cpuva; | 580 | volatile u32 *cpu_vaddr = f->userd.cpu_va; |
661 | 581 | ||
662 | gk20a_dbg_info("test bar1 @ vaddr 0x%x", | 582 | gk20a_dbg_info("test bar1 @ vaddr 0x%x", |
663 | bar1_vaddr); | 583 | bar1_vaddr); |
@@ -725,8 +645,8 @@ channel_from_inst_ptr(struct fifo_gk20a *f, u64 inst_ptr) | |||
725 | return NULL; | 645 | return NULL; |
726 | for (ci = 0; ci < f->num_channels; ci++) { | 646 | for (ci = 0; ci < f->num_channels; ci++) { |
727 | struct channel_gk20a *c = f->channel+ci; | 647 | struct channel_gk20a *c = f->channel+ci; |
728 | if (c->inst_block.cpuva && | 648 | if (c->inst_block.cpu_va && |
729 | (inst_ptr == c->inst_block.cpu_pa)) | 649 | (inst_ptr == sg_phys(c->inst_block.sgt->sgl))) |
730 | return f->channel+ci; | 650 | return f->channel+ci; |
731 | } | 651 | } |
732 | return NULL; | 652 | return NULL; |
@@ -1082,10 +1002,10 @@ static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g) | |||
1082 | gk20a_fifo_set_ctx_mmu_error_ch(g, ch); | 1002 | gk20a_fifo_set_ctx_mmu_error_ch(g, ch); |
1083 | gk20a_channel_abort(ch); | 1003 | gk20a_channel_abort(ch); |
1084 | } else if (f.inst_ptr == | 1004 | } else if (f.inst_ptr == |
1085 | g->mm.bar1.inst_block.cpu_pa) { | 1005 | sg_phys(g->mm.bar1.inst_block.sgt->sgl)) { |
1086 | gk20a_err(dev_from_gk20a(g), "mmu fault from bar1"); | 1006 | gk20a_err(dev_from_gk20a(g), "mmu fault from bar1"); |
1087 | } else if (f.inst_ptr == | 1007 | } else if (f.inst_ptr == |
1088 | g->mm.pmu.inst_block.cpu_pa) { | 1008 | sg_phys(g->mm.pmu.inst_block.sgt->sgl)) { |
1089 | gk20a_err(dev_from_gk20a(g), "mmu fault from pmu"); | 1009 | gk20a_err(dev_from_gk20a(g), "mmu fault from pmu"); |
1090 | } else | 1010 | } else |
1091 | gk20a_err(dev_from_gk20a(g), "couldn't locate channel for mmu fault"); | 1011 | gk20a_err(dev_from_gk20a(g), "couldn't locate channel for mmu fault"); |
@@ -1893,7 +1813,6 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, | |||
1893 | bool wait_for_finish) | 1813 | bool wait_for_finish) |
1894 | { | 1814 | { |
1895 | u32 ret = 0; | 1815 | u32 ret = 0; |
1896 | struct device *d = dev_from_gk20a(g); | ||
1897 | struct fifo_gk20a *f = &g->fifo; | 1816 | struct fifo_gk20a *f = &g->fifo; |
1898 | struct fifo_runlist_info_gk20a *runlist = NULL; | 1817 | struct fifo_runlist_info_gk20a *runlist = NULL; |
1899 | u32 *runlist_entry_base = NULL; | 1818 | u32 *runlist_entry_base = NULL; |
@@ -1935,15 +1854,15 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, | |||
1935 | new_buf = !runlist->cur_buffer; | 1854 | new_buf = !runlist->cur_buffer; |
1936 | 1855 | ||
1937 | gk20a_dbg_info("runlist_id : %d, switch to new buffer 0x%16llx", | 1856 | gk20a_dbg_info("runlist_id : %d, switch to new buffer 0x%16llx", |
1938 | runlist_id, runlist->mem[new_buf].iova); | 1857 | runlist_id, (u64)gk20a_mem_phys(&runlist->mem[new_buf])); |
1939 | 1858 | ||
1940 | runlist_pa = gk20a_get_phys_from_iova(d, runlist->mem[new_buf].iova); | 1859 | runlist_pa = gk20a_mem_phys(&runlist->mem[new_buf]); |
1941 | if (!runlist_pa) { | 1860 | if (!runlist_pa) { |
1942 | ret = -EINVAL; | 1861 | ret = -EINVAL; |
1943 | goto clean_up; | 1862 | goto clean_up; |
1944 | } | 1863 | } |
1945 | 1864 | ||
1946 | runlist_entry_base = runlist->mem[new_buf].cpuva; | 1865 | runlist_entry_base = runlist->mem[new_buf].cpu_va; |
1947 | if (!runlist_entry_base) { | 1866 | if (!runlist_entry_base) { |
1948 | ret = -ENOMEM; | 1867 | ret = -ENOMEM; |
1949 | goto clean_up; | 1868 | goto clean_up; |
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h index 4ff1398a..dd320ae1 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h | |||
@@ -32,7 +32,7 @@ struct fifo_runlist_info_gk20a { | |||
32 | unsigned long *active_channels; | 32 | unsigned long *active_channels; |
33 | unsigned long *active_tsgs; | 33 | unsigned long *active_tsgs; |
34 | /* Each engine has its own SW and HW runlist buffer.*/ | 34 | /* Each engine has its own SW and HW runlist buffer.*/ |
35 | struct runlist_mem_desc mem[MAX_RUNLIST_BUFFERS]; | 35 | struct mem_desc mem[MAX_RUNLIST_BUFFERS]; |
36 | u32 cur_buffer; | 36 | u32 cur_buffer; |
37 | u32 total_entries; | 37 | u32 total_entries; |
38 | bool stopped; | 38 | bool stopped; |
@@ -102,9 +102,8 @@ struct fifo_gk20a { | |||
102 | struct fifo_runlist_info_gk20a *runlist_info; | 102 | struct fifo_runlist_info_gk20a *runlist_info; |
103 | u32 max_runlists; | 103 | u32 max_runlists; |
104 | 104 | ||
105 | struct userd_desc userd; | 105 | struct mem_desc userd; |
106 | u32 userd_entry_size; | 106 | u32 userd_entry_size; |
107 | u32 userd_total_size; | ||
108 | 107 | ||
109 | struct channel_gk20a *channel; | 108 | struct channel_gk20a *channel; |
110 | struct mutex ch_inuse_mutex; /* protect unused chid look up */ | 109 | struct mutex ch_inuse_mutex; /* protect unused chid look up */ |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index e9b209c4..a160942f 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -567,7 +567,7 @@ static int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va) | |||
567 | 567 | ||
568 | gk20a_dbg_fn(""); | 568 | gk20a_dbg_fn(""); |
569 | 569 | ||
570 | inst_ptr = c->inst_block.cpuva; | 570 | inst_ptr = c->inst_block.cpu_va; |
571 | if (!inst_ptr) | 571 | if (!inst_ptr) |
572 | return -ENOMEM; | 572 | return -ENOMEM; |
573 | 573 | ||
@@ -674,7 +674,7 @@ int gr_gk20a_ctx_patch_write(struct gk20a *g, | |||
674 | static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g, | 674 | static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g, |
675 | struct channel_gk20a *c) | 675 | struct channel_gk20a *c) |
676 | { | 676 | { |
677 | u32 inst_base_ptr = u64_lo32(c->inst_block.cpu_pa | 677 | u32 inst_base_ptr = u64_lo32(gk20a_mem_phys(&c->inst_block) |
678 | >> ram_in_base_shift_v()); | 678 | >> ram_in_base_shift_v()); |
679 | u32 ret; | 679 | u32 ret; |
680 | 680 | ||
@@ -1375,7 +1375,7 @@ static int gr_gk20a_fecs_ctx_image_save(struct channel_gk20a *c, u32 save_type) | |||
1375 | int ret; | 1375 | int ret; |
1376 | 1376 | ||
1377 | u32 inst_base_ptr = | 1377 | u32 inst_base_ptr = |
1378 | u64_lo32(c->inst_block.cpu_pa | 1378 | u64_lo32(gk20a_mem_phys(&c->inst_block) |
1379 | >> ram_in_base_shift_v()); | 1379 | >> ram_in_base_shift_v()); |
1380 | 1380 | ||
1381 | 1381 | ||
@@ -1671,7 +1671,7 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | |||
1671 | 1671 | ||
1672 | if (tegra_platform_is_linsim()) { | 1672 | if (tegra_platform_is_linsim()) { |
1673 | u32 inst_base_ptr = | 1673 | u32 inst_base_ptr = |
1674 | u64_lo32(c->inst_block.cpu_pa | 1674 | u64_lo32(gk20a_mem_phys(&c->inst_block) |
1675 | >> ram_in_base_shift_v()); | 1675 | >> ram_in_base_shift_v()); |
1676 | 1676 | ||
1677 | ret = gr_gk20a_submit_fecs_method_op(g, | 1677 | ret = gr_gk20a_submit_fecs_method_op(g, |
@@ -1729,12 +1729,12 @@ static int gr_gk20a_init_ctxsw_ucode_vaspace(struct gk20a *g) | |||
1729 | gk20a_init_inst_block(&ucode_info->inst_blk_desc, vm, 0); | 1729 | gk20a_init_inst_block(&ucode_info->inst_blk_desc, vm, 0); |
1730 | 1730 | ||
1731 | /* Map ucode surface to GMMU */ | 1731 | /* Map ucode surface to GMMU */ |
1732 | ucode_info->ucode_gpuva = gk20a_gmmu_map(vm, | 1732 | ucode_info->surface_desc.gpu_va = gk20a_gmmu_map(vm, |
1733 | &ucode_info->surface_desc.sgt, | 1733 | &ucode_info->surface_desc.sgt, |
1734 | ucode_info->surface_desc.size, | 1734 | ucode_info->surface_desc.size, |
1735 | 0, /* flags */ | 1735 | 0, /* flags */ |
1736 | gk20a_mem_flag_read_only); | 1736 | gk20a_mem_flag_read_only); |
1737 | if (!ucode_info->ucode_gpuva) { | 1737 | if (!ucode_info->surface_desc.gpu_va) { |
1738 | gk20a_err(d, "failed to update gmmu ptes\n"); | 1738 | gk20a_err(d, "failed to update gmmu ptes\n"); |
1739 | return -ENOMEM; | 1739 | return -ENOMEM; |
1740 | } | 1740 | } |
@@ -1798,8 +1798,6 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g) | |||
1798 | u8 *buf; | 1798 | u8 *buf; |
1799 | u32 ucode_size; | 1799 | u32 ucode_size; |
1800 | int err = 0; | 1800 | int err = 0; |
1801 | dma_addr_t iova; | ||
1802 | DEFINE_DMA_ATTRS(attrs); | ||
1803 | 1801 | ||
1804 | fecs_fw = gk20a_request_firmware(g, GK20A_FECS_UCODE_IMAGE); | 1802 | fecs_fw = gk20a_request_firmware(g, GK20A_FECS_UCODE_IMAGE); |
1805 | if (!fecs_fw) { | 1803 | if (!fecs_fw) { |
@@ -1832,30 +1830,12 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g) | |||
1832 | g->gr.ctx_vars.ucode.gpccs.inst.count * sizeof(u32), | 1830 | g->gr.ctx_vars.ucode.gpccs.inst.count * sizeof(u32), |
1833 | g->gr.ctx_vars.ucode.gpccs.data.count * sizeof(u32)); | 1831 | g->gr.ctx_vars.ucode.gpccs.data.count * sizeof(u32)); |
1834 | 1832 | ||
1835 | ucode_info->surface_desc.size = ucode_size; | 1833 | err = gk20a_gmmu_alloc_attr(g, DMA_ATTR_READ_ONLY, ucode_size, |
1836 | dma_set_attr(DMA_ATTR_READ_ONLY, &attrs); | 1834 | &ucode_info->surface_desc); |
1837 | ucode_info->surface_desc.cpuva = dma_alloc_attrs(d, | 1835 | if (err) |
1838 | ucode_info->surface_desc.size, | ||
1839 | &iova, | ||
1840 | GFP_KERNEL, | ||
1841 | &attrs); | ||
1842 | if (!ucode_info->surface_desc.cpuva) { | ||
1843 | gk20a_err(d, "memory allocation failed\n"); | ||
1844 | err = -ENOMEM; | ||
1845 | goto clean_up; | ||
1846 | } | ||
1847 | |||
1848 | ucode_info->surface_desc.iova = iova; | ||
1849 | err = gk20a_get_sgtable(d, &ucode_info->surface_desc.sgt, | ||
1850 | ucode_info->surface_desc.cpuva, | ||
1851 | ucode_info->surface_desc.iova, | ||
1852 | ucode_info->surface_desc.size); | ||
1853 | if (err) { | ||
1854 | gk20a_err(d, "failed to create sg table\n"); | ||
1855 | goto clean_up; | 1836 | goto clean_up; |
1856 | } | ||
1857 | 1837 | ||
1858 | buf = (u8 *)ucode_info->surface_desc.cpuva; | 1838 | buf = (u8 *)ucode_info->surface_desc.cpu_va; |
1859 | if (!buf) { | 1839 | if (!buf) { |
1860 | gk20a_err(d, "failed to map surface desc buffer"); | 1840 | gk20a_err(d, "failed to map surface desc buffer"); |
1861 | err = -ENOMEM; | 1841 | err = -ENOMEM; |
@@ -1882,23 +1862,13 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g) | |||
1882 | if (err) | 1862 | if (err) |
1883 | goto clean_up; | 1863 | goto clean_up; |
1884 | 1864 | ||
1885 | gk20a_free_sgtable(&ucode_info->surface_desc.sgt); | ||
1886 | |||
1887 | return 0; | 1865 | return 0; |
1888 | 1866 | ||
1889 | clean_up: | 1867 | clean_up: |
1890 | if (ucode_info->ucode_gpuva) | 1868 | if (ucode_info->surface_desc.gpu_va) |
1891 | gk20a_gmmu_unmap(vm, ucode_info->ucode_gpuva, | 1869 | gk20a_gmmu_unmap(vm, ucode_info->surface_desc.gpu_va, |
1892 | ucode_info->surface_desc.size, gk20a_mem_flag_none); | 1870 | ucode_info->surface_desc.size, gk20a_mem_flag_none); |
1893 | if (ucode_info->surface_desc.sgt) | 1871 | gk20a_gmmu_free(g, &ucode_info->surface_desc); |
1894 | gk20a_free_sgtable(&ucode_info->surface_desc.sgt); | ||
1895 | if (ucode_info->surface_desc.cpuva) | ||
1896 | dma_free_attrs(d, ucode_info->surface_desc.size, | ||
1897 | ucode_info->surface_desc.cpuva, | ||
1898 | ucode_info->surface_desc.iova, | ||
1899 | &attrs); | ||
1900 | ucode_info->surface_desc.cpuva = NULL; | ||
1901 | ucode_info->surface_desc.iova = 0; | ||
1902 | 1872 | ||
1903 | release_firmware(gpccs_fw); | 1873 | release_firmware(gpccs_fw); |
1904 | gpccs_fw = NULL; | 1874 | gpccs_fw = NULL; |
@@ -1928,7 +1898,7 @@ void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g) | |||
1928 | 1898 | ||
1929 | gk20a_writel(g, gr_fecs_arb_ctx_adr_r(), 0x0); | 1899 | gk20a_writel(g, gr_fecs_arb_ctx_adr_r(), 0x0); |
1930 | 1900 | ||
1931 | inst_ptr = ucode_info->inst_blk_desc.cpu_pa; | 1901 | inst_ptr = gk20a_mem_phys(&ucode_info->inst_blk_desc); |
1932 | gk20a_writel(g, gr_fecs_new_ctx_r(), | 1902 | gk20a_writel(g, gr_fecs_new_ctx_r(), |
1933 | gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) | | 1903 | gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) | |
1934 | gr_fecs_new_ctx_target_m() | | 1904 | gr_fecs_new_ctx_target_m() | |
@@ -2111,7 +2081,7 @@ static int gr_gk20a_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base, | |||
2111 | static void gr_gk20a_load_falcon_with_bootloader(struct gk20a *g) | 2081 | static void gr_gk20a_load_falcon_with_bootloader(struct gk20a *g) |
2112 | { | 2082 | { |
2113 | struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; | 2083 | struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; |
2114 | u64 addr_base = ucode_info->ucode_gpuva; | 2084 | u64 addr_base = ucode_info->surface_desc.gpu_va; |
2115 | 2085 | ||
2116 | gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), 0x0); | 2086 | gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), 0x0); |
2117 | 2087 | ||
@@ -2128,6 +2098,7 @@ static void gr_gk20a_load_falcon_with_bootloader(struct gk20a *g) | |||
2128 | 2098 | ||
2129 | int gr_gk20a_load_ctxsw_ucode(struct gk20a *g) | 2099 | int gr_gk20a_load_ctxsw_ucode(struct gk20a *g) |
2130 | { | 2100 | { |
2101 | int err; | ||
2131 | 2102 | ||
2132 | gk20a_dbg_fn(""); | 2103 | gk20a_dbg_fn(""); |
2133 | 2104 | ||
@@ -2147,8 +2118,12 @@ int gr_gk20a_load_ctxsw_ucode(struct gk20a *g) | |||
2147 | gr_gk20a_load_falcon_imem(g); | 2118 | gr_gk20a_load_falcon_imem(g); |
2148 | gr_gk20a_start_falcon_ucode(g); | 2119 | gr_gk20a_start_falcon_ucode(g); |
2149 | } else { | 2120 | } else { |
2150 | if (!g->gr.skip_ucode_init) | 2121 | if (!g->gr.skip_ucode_init) { |
2151 | gr_gk20a_init_ctxsw_ucode(g); | 2122 | err = gr_gk20a_init_ctxsw_ucode(g); |
2123 | |||
2124 | if (err) | ||
2125 | return err; | ||
2126 | } | ||
2152 | gr_gk20a_load_falcon_with_bootloader(g); | 2127 | gr_gk20a_load_falcon_with_bootloader(g); |
2153 | g->gr.skip_ucode_init = true; | 2128 | g->gr.skip_ucode_init = true; |
2154 | } | 2129 | } |
@@ -2976,21 +2951,13 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr) | |||
2976 | 2951 | ||
2977 | gr_gk20a_free_global_ctx_buffers(g); | 2952 | gr_gk20a_free_global_ctx_buffers(g); |
2978 | 2953 | ||
2979 | dma_free_coherent(d, gr->mmu_wr_mem.size, | 2954 | gk20a_gmmu_free(g, &gr->mmu_wr_mem); |
2980 | gr->mmu_wr_mem.cpuva, gr->mmu_wr_mem.iova); | 2955 | gk20a_gmmu_free(g, &gr->mmu_rd_mem); |
2981 | gr->mmu_wr_mem.cpuva = NULL; | ||
2982 | gr->mmu_wr_mem.iova = 0; | ||
2983 | dma_free_coherent(d, gr->mmu_rd_mem.size, | ||
2984 | gr->mmu_rd_mem.cpuva, gr->mmu_rd_mem.iova); | ||
2985 | gr->mmu_rd_mem.cpuva = NULL; | ||
2986 | gr->mmu_rd_mem.iova = 0; | ||
2987 | 2956 | ||
2988 | dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); | 2957 | dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); |
2989 | dma_free_attrs(d, gr->compbit_store.size, gr->compbit_store.pages, | 2958 | dma_free_attrs(d, gr->compbit_store.size, gr->compbit_store.pages, |
2990 | gr->compbit_store.base_iova, &attrs); | 2959 | gr->compbit_store.base_iova, &attrs); |
2991 | 2960 | ||
2992 | memset(&gr->mmu_wr_mem, 0, sizeof(struct mmu_desc)); | ||
2993 | memset(&gr->mmu_rd_mem, 0, sizeof(struct mmu_desc)); | ||
2994 | memset(&gr->compbit_store, 0, sizeof(struct compbit_store_desc)); | 2961 | memset(&gr->compbit_store, 0, sizeof(struct compbit_store_desc)); |
2995 | 2962 | ||
2996 | kfree(gr->gpc_tpc_count); | 2963 | kfree(gr->gpc_tpc_count); |
@@ -3234,33 +3201,19 @@ clean_up: | |||
3234 | 3201 | ||
3235 | static int gr_gk20a_init_mmu_sw(struct gk20a *g, struct gr_gk20a *gr) | 3202 | static int gr_gk20a_init_mmu_sw(struct gk20a *g, struct gr_gk20a *gr) |
3236 | { | 3203 | { |
3237 | struct device *d = dev_from_gk20a(g); | 3204 | int err; |
3238 | dma_addr_t iova; | ||
3239 | |||
3240 | gr->mmu_wr_mem_size = gr->mmu_rd_mem_size = 0x1000; | ||
3241 | 3205 | ||
3242 | gr->mmu_wr_mem.size = gr->mmu_wr_mem_size; | 3206 | err = gk20a_gmmu_alloc(g, 0x1000, &gr->mmu_wr_mem); |
3243 | gr->mmu_wr_mem.cpuva = dma_zalloc_coherent(d, gr->mmu_wr_mem_size, | 3207 | if (err) |
3244 | &iova, GFP_KERNEL); | ||
3245 | if (!gr->mmu_wr_mem.cpuva) | ||
3246 | goto err; | 3208 | goto err; |
3247 | 3209 | ||
3248 | gr->mmu_wr_mem.iova = iova; | 3210 | err = gk20a_gmmu_alloc(g, 0x1000, &gr->mmu_rd_mem); |
3249 | 3211 | if (err) | |
3250 | gr->mmu_rd_mem.size = gr->mmu_rd_mem_size; | ||
3251 | gr->mmu_rd_mem.cpuva = dma_zalloc_coherent(d, gr->mmu_rd_mem_size, | ||
3252 | &iova, GFP_KERNEL); | ||
3253 | if (!gr->mmu_rd_mem.cpuva) | ||
3254 | goto err_free_wr_mem; | 3212 | goto err_free_wr_mem; |
3255 | |||
3256 | gr->mmu_rd_mem.iova = iova; | ||
3257 | return 0; | 3213 | return 0; |
3258 | 3214 | ||
3259 | err_free_wr_mem: | 3215 | err_free_wr_mem: |
3260 | dma_free_coherent(d, gr->mmu_wr_mem.size, | 3216 | gk20a_gmmu_free(g, &gr->mmu_wr_mem); |
3261 | gr->mmu_wr_mem.cpuva, gr->mmu_wr_mem.iova); | ||
3262 | gr->mmu_wr_mem.cpuva = NULL; | ||
3263 | gr->mmu_wr_mem.iova = 0; | ||
3264 | err: | 3217 | err: |
3265 | return -ENOMEM; | 3218 | return -ENOMEM; |
3266 | } | 3219 | } |
@@ -4241,7 +4194,7 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g) | |||
4241 | gk20a_dbg_fn(""); | 4194 | gk20a_dbg_fn(""); |
4242 | 4195 | ||
4243 | /* init mmu debug buffer */ | 4196 | /* init mmu debug buffer */ |
4244 | addr = gk20a_mm_smmu_vaddr_translate(g, gr->mmu_wr_mem.iova); | 4197 | addr = gk20a_mm_iova_addr(g, gr->mmu_wr_mem.sgt->sgl); |
4245 | addr >>= fb_mmu_debug_wr_addr_alignment_v(); | 4198 | addr >>= fb_mmu_debug_wr_addr_alignment_v(); |
4246 | 4199 | ||
4247 | gk20a_writel(g, fb_mmu_debug_wr_r(), | 4200 | gk20a_writel(g, fb_mmu_debug_wr_r(), |
@@ -4249,7 +4202,7 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g) | |||
4249 | fb_mmu_debug_wr_vol_false_f() | | 4202 | fb_mmu_debug_wr_vol_false_f() | |
4250 | fb_mmu_debug_wr_addr_f(addr)); | 4203 | fb_mmu_debug_wr_addr_f(addr)); |
4251 | 4204 | ||
4252 | addr = gk20a_mm_smmu_vaddr_translate(g, gr->mmu_rd_mem.iova); | 4205 | addr = gk20a_mm_iova_addr(g, gr->mmu_rd_mem.sgt->sgl); |
4253 | addr >>= fb_mmu_debug_rd_addr_alignment_v(); | 4206 | addr >>= fb_mmu_debug_rd_addr_alignment_v(); |
4254 | 4207 | ||
4255 | gk20a_writel(g, fb_mmu_debug_rd_r(), | 4208 | gk20a_writel(g, fb_mmu_debug_rd_r(), |
@@ -4651,8 +4604,6 @@ static int gk20a_init_gr_bind_fecs_elpg(struct gk20a *g) | |||
4651 | int err = 0; | 4604 | int err = 0; |
4652 | 4605 | ||
4653 | u32 size; | 4606 | u32 size; |
4654 | struct sg_table *sgt_pg_buf; | ||
4655 | dma_addr_t iova; | ||
4656 | 4607 | ||
4657 | gk20a_dbg_fn(""); | 4608 | gk20a_dbg_fn(""); |
4658 | 4609 | ||
@@ -4665,50 +4616,24 @@ static int gk20a_init_gr_bind_fecs_elpg(struct gk20a *g) | |||
4665 | return err; | 4616 | return err; |
4666 | } | 4617 | } |
4667 | 4618 | ||
4668 | if (!pmu->pg_buf.cpuva) { | 4619 | if (!pmu->pg_buf.cpu_va) { |
4669 | pmu->pg_buf.cpuva = dma_alloc_coherent(d, size, | 4620 | err = gk20a_gmmu_alloc_map(vm, size, &pmu->pg_buf); |
4670 | &iova, | 4621 | if (err) { |
4671 | GFP_KERNEL); | ||
4672 | if (!pmu->pg_buf.cpuva) { | ||
4673 | gk20a_err(d, "failed to allocate memory\n"); | 4622 | gk20a_err(d, "failed to allocate memory\n"); |
4674 | return -ENOMEM; | 4623 | return -ENOMEM; |
4675 | } | 4624 | } |
4676 | |||
4677 | pmu->pg_buf.iova = iova; | ||
4678 | pmu->pg_buf.size = size; | ||
4679 | |||
4680 | err = gk20a_get_sgtable(d, &sgt_pg_buf, | ||
4681 | pmu->pg_buf.cpuva, | ||
4682 | pmu->pg_buf.iova, | ||
4683 | size); | ||
4684 | if (err) { | ||
4685 | gk20a_err(d, "failed to create sg table\n"); | ||
4686 | goto err_free_pg_buf; | ||
4687 | } | ||
4688 | |||
4689 | pmu->pg_buf.pmu_va = gk20a_gmmu_map(vm, | ||
4690 | &sgt_pg_buf, | ||
4691 | size, | ||
4692 | 0, /* flags */ | ||
4693 | gk20a_mem_flag_none); | ||
4694 | if (!pmu->pg_buf.pmu_va) { | ||
4695 | gk20a_err(d, "failed to map fecs pg buffer"); | ||
4696 | err = -ENOMEM; | ||
4697 | goto err_free_sgtable; | ||
4698 | } | ||
4699 | |||
4700 | gk20a_free_sgtable(&sgt_pg_buf); | ||
4701 | } | 4625 | } |
4702 | 4626 | ||
4703 | 4627 | ||
4704 | err = gr_gk20a_fecs_set_reglist_bind_inst(g, mm->pmu.inst_block.cpu_pa); | 4628 | err = gr_gk20a_fecs_set_reglist_bind_inst(g, |
4629 | gk20a_mem_phys(&mm->pmu.inst_block)); | ||
4705 | if (err) { | 4630 | if (err) { |
4706 | gk20a_err(dev_from_gk20a(g), | 4631 | gk20a_err(dev_from_gk20a(g), |
4707 | "fail to bind pmu inst to gr"); | 4632 | "fail to bind pmu inst to gr"); |
4708 | return err; | 4633 | return err; |
4709 | } | 4634 | } |
4710 | 4635 | ||
4711 | err = gr_gk20a_fecs_set_reglist_virtual_addr(g, pmu->pg_buf.pmu_va); | 4636 | err = gr_gk20a_fecs_set_reglist_virtual_addr(g, pmu->pg_buf.gpu_va); |
4712 | if (err) { | 4637 | if (err) { |
4713 | gk20a_err(dev_from_gk20a(g), | 4638 | gk20a_err(dev_from_gk20a(g), |
4714 | "fail to set pg buffer pmu va"); | 4639 | "fail to set pg buffer pmu va"); |
@@ -4716,15 +4641,6 @@ static int gk20a_init_gr_bind_fecs_elpg(struct gk20a *g) | |||
4716 | } | 4641 | } |
4717 | 4642 | ||
4718 | return err; | 4643 | return err; |
4719 | |||
4720 | err_free_sgtable: | ||
4721 | gk20a_free_sgtable(&sgt_pg_buf); | ||
4722 | err_free_pg_buf: | ||
4723 | dma_free_coherent(d, size, | ||
4724 | pmu->pg_buf.cpuva, pmu->pg_buf.iova); | ||
4725 | pmu->pg_buf.cpuva = NULL; | ||
4726 | pmu->pg_buf.iova = 0; | ||
4727 | return err; | ||
4728 | } | 4644 | } |
4729 | 4645 | ||
4730 | int gk20a_init_gr_support(struct gk20a *g) | 4646 | int gk20a_init_gr_support(struct gk20a *g) |
@@ -4983,14 +4899,14 @@ int gk20a_gr_reset(struct gk20a *g) | |||
4983 | } | 4899 | } |
4984 | 4900 | ||
4985 | err = gr_gk20a_fecs_set_reglist_bind_inst(g, | 4901 | err = gr_gk20a_fecs_set_reglist_bind_inst(g, |
4986 | g->mm.pmu.inst_block.cpu_pa); | 4902 | gk20a_mem_phys(&g->mm.pmu.inst_block)); |
4987 | if (err) { | 4903 | if (err) { |
4988 | gk20a_err(dev_from_gk20a(g), | 4904 | gk20a_err(dev_from_gk20a(g), |
4989 | "fail to bind pmu inst to gr"); | 4905 | "fail to bind pmu inst to gr"); |
4990 | return err; | 4906 | return err; |
4991 | } | 4907 | } |
4992 | 4908 | ||
4993 | err = gr_gk20a_fecs_set_reglist_virtual_addr(g, g->pmu.pg_buf.pmu_va); | 4909 | err = gr_gk20a_fecs_set_reglist_virtual_addr(g, g->pmu.pg_buf.gpu_va); |
4994 | if (err) { | 4910 | if (err) { |
4995 | gk20a_err(dev_from_gk20a(g), | 4911 | gk20a_err(dev_from_gk20a(g), |
4996 | "fail to set pg buffer pmu va"); | 4912 | "fail to set pg buffer pmu va"); |
@@ -5357,7 +5273,7 @@ static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx, | |||
5357 | /* slow path */ | 5273 | /* slow path */ |
5358 | for (chid = 0; chid < f->num_channels; chid++) | 5274 | for (chid = 0; chid < f->num_channels; chid++) |
5359 | if (f->channel[chid].in_use) { | 5275 | if (f->channel[chid].in_use) { |
5360 | if ((u32)(f->channel[chid].inst_block.cpu_pa >> | 5276 | if ((u32)(gk20a_mem_phys(&f->channel[chid].inst_block) >> |
5361 | ram_in_base_shift_v()) == | 5277 | ram_in_base_shift_v()) == |
5362 | gr_fecs_current_ctx_ptr_v(curr_ctx)) { | 5278 | gr_fecs_current_ctx_ptr_v(curr_ctx)) { |
5363 | tsgid = f->channel[chid].tsgid; | 5279 | tsgid = f->channel[chid].tsgid; |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 5dfaac5f..81615e0f 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h | |||
@@ -256,10 +256,8 @@ struct gr_gk20a { | |||
256 | 256 | ||
257 | struct gr_ctx_buffer_desc global_ctx_buffer[NR_GLOBAL_CTX_BUF]; | 257 | struct gr_ctx_buffer_desc global_ctx_buffer[NR_GLOBAL_CTX_BUF]; |
258 | 258 | ||
259 | struct mmu_desc mmu_wr_mem; | 259 | struct mem_desc mmu_wr_mem; |
260 | u32 mmu_wr_mem_size; | 260 | struct mem_desc mmu_rd_mem; |
261 | struct mmu_desc mmu_rd_mem; | ||
262 | u32 mmu_rd_mem_size; | ||
263 | 261 | ||
264 | u8 *map_tiles; | 262 | u8 *map_tiles; |
265 | u32 map_tile_count; | 263 | u32 map_tile_count; |
@@ -336,9 +334,8 @@ struct gk20a_ctxsw_ucode_segments { | |||
336 | 334 | ||
337 | struct gk20a_ctxsw_ucode_info { | 335 | struct gk20a_ctxsw_ucode_info { |
338 | u64 *p_va; | 336 | u64 *p_va; |
339 | struct inst_desc inst_blk_desc; | 337 | struct mem_desc inst_blk_desc; |
340 | struct surface_mem_desc surface_desc; | 338 | struct mem_desc surface_desc; |
341 | u64 ucode_gpuva; | ||
342 | struct gk20a_ctxsw_ucode_segments fecs; | 339 | struct gk20a_ctxsw_ucode_segments fecs; |
343 | struct gk20a_ctxsw_ucode_segments gpccs; | 340 | struct gk20a_ctxsw_ucode_segments gpccs; |
344 | }; | 341 | }; |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index c3895a53..954249c6 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -268,7 +268,7 @@ static int gk20a_init_mm_reset_enable_hw(struct gk20a *g) | |||
268 | return 0; | 268 | return 0; |
269 | } | 269 | } |
270 | 270 | ||
271 | static void gk20a_remove_vm(struct vm_gk20a *vm, struct inst_desc *inst_block) | 271 | static void gk20a_remove_vm(struct vm_gk20a *vm, struct mem_desc *inst_block) |
272 | { | 272 | { |
273 | struct gk20a *g = vm->mm->g; | 273 | struct gk20a *g = vm->mm->g; |
274 | 274 | ||
@@ -335,8 +335,8 @@ int gk20a_init_mm_setup_sw(struct gk20a *g) | |||
335 | int gk20a_init_mm_setup_hw(struct gk20a *g) | 335 | int gk20a_init_mm_setup_hw(struct gk20a *g) |
336 | { | 336 | { |
337 | struct mm_gk20a *mm = &g->mm; | 337 | struct mm_gk20a *mm = &g->mm; |
338 | struct inst_desc *inst_block = &mm->bar1.inst_block; | 338 | struct mem_desc *inst_block = &mm->bar1.inst_block; |
339 | phys_addr_t inst_pa = inst_block->cpu_pa; | 339 | phys_addr_t inst_pa = gk20a_mem_phys(inst_block); |
340 | int err; | 340 | int err; |
341 | 341 | ||
342 | gk20a_dbg_fn(""); | 342 | gk20a_dbg_fn(""); |
@@ -1516,54 +1516,95 @@ u64 gk20a_gmmu_map(struct vm_gk20a *vm, | |||
1516 | return vaddr; | 1516 | return vaddr; |
1517 | } | 1517 | } |
1518 | 1518 | ||
1519 | int gk20a_gmmu_alloc_map(struct vm_gk20a *vm, | 1519 | int gk20a_gmmu_alloc(struct gk20a *g, size_t size, struct mem_desc *mem) |
1520 | size_t size, struct mem_desc *mem) | 1520 | { |
1521 | return gk20a_gmmu_alloc_attr(g, 0, size, mem); | ||
1522 | } | ||
1523 | |||
1524 | int gk20a_gmmu_alloc_attr(struct gk20a *g, enum dma_attr attr, size_t size, struct mem_desc *mem) | ||
1521 | { | 1525 | { |
1522 | struct gk20a *g = vm->mm->g; | ||
1523 | struct device *d = dev_from_gk20a(g); | 1526 | struct device *d = dev_from_gk20a(g); |
1524 | int err; | 1527 | int err; |
1525 | struct sg_table *sgt; | 1528 | dma_addr_t iova; |
1529 | |||
1530 | gk20a_dbg_fn(""); | ||
1531 | |||
1532 | if (attr) { | ||
1533 | DEFINE_DMA_ATTRS(attrs); | ||
1534 | dma_set_attr(attr, &attrs); | ||
1535 | mem->cpu_va = | ||
1536 | dma_alloc_attrs(d, size, &iova, GFP_KERNEL, &attrs); | ||
1537 | } else { | ||
1538 | mem->cpu_va = dma_alloc_coherent(d, size, &iova, GFP_KERNEL); | ||
1539 | } | ||
1526 | 1540 | ||
1527 | mem->cpu_va = dma_alloc_coherent(d, size, &mem->iova, GFP_KERNEL); | ||
1528 | if (!mem->cpu_va) | 1541 | if (!mem->cpu_va) |
1529 | return -ENOMEM; | 1542 | return -ENOMEM; |
1530 | 1543 | ||
1531 | err = gk20a_get_sgtable(d, &sgt, mem->cpu_va, mem->iova, size); | 1544 | err = gk20a_get_sgtable(d, &mem->sgt, mem->cpu_va, iova, size); |
1532 | if (err) | 1545 | if (err) |
1533 | goto fail_free; | 1546 | goto fail_free; |
1534 | 1547 | ||
1535 | mem->gpu_va = gk20a_gmmu_map(vm, &sgt, size, 0, gk20a_mem_flag_none); | 1548 | mem->size = size; |
1536 | gk20a_free_sgtable(&sgt); | 1549 | memset(mem->cpu_va, 0, size); |
1550 | |||
1551 | gk20a_dbg_fn("done"); | ||
1552 | |||
1553 | return 0; | ||
1554 | |||
1555 | fail_free: | ||
1556 | dma_free_coherent(d, size, mem->cpu_va, iova); | ||
1557 | mem->cpu_va = NULL; | ||
1558 | mem->sgt = NULL; | ||
1559 | return err; | ||
1560 | } | ||
1561 | |||
1562 | void gk20a_gmmu_free(struct gk20a *g, struct mem_desc *mem) | ||
1563 | { | ||
1564 | struct device *d = dev_from_gk20a(g); | ||
1565 | |||
1566 | if (mem->cpu_va) | ||
1567 | dma_free_coherent(d, mem->size, mem->cpu_va, | ||
1568 | sg_dma_address(mem->sgt->sgl)); | ||
1569 | mem->cpu_va = NULL; | ||
1570 | |||
1571 | if (mem->sgt) | ||
1572 | gk20a_free_sgtable(&mem->sgt); | ||
1573 | } | ||
1574 | |||
1575 | int gk20a_gmmu_alloc_map(struct vm_gk20a *vm, size_t size, struct mem_desc *mem) | ||
1576 | { | ||
1577 | return gk20a_gmmu_alloc_map_attr(vm, 0, size, mem); | ||
1578 | } | ||
1579 | |||
1580 | int gk20a_gmmu_alloc_map_attr(struct vm_gk20a *vm, | ||
1581 | enum dma_attr attr, size_t size, struct mem_desc *mem) | ||
1582 | { | ||
1583 | int err = gk20a_gmmu_alloc_attr(vm->mm->g, attr, size, mem); | ||
1584 | |||
1585 | if (err) | ||
1586 | return err; | ||
1587 | |||
1588 | mem->gpu_va = gk20a_gmmu_map(vm, &mem->sgt, size, 0, gk20a_mem_flag_none); | ||
1537 | if (!mem->gpu_va) { | 1589 | if (!mem->gpu_va) { |
1538 | err = -ENOMEM; | 1590 | err = -ENOMEM; |
1539 | goto fail_free; | 1591 | goto fail_free; |
1540 | } | 1592 | } |
1541 | 1593 | ||
1542 | mem->size = size; | ||
1543 | |||
1544 | return 0; | 1594 | return 0; |
1545 | 1595 | ||
1546 | fail_free: | 1596 | fail_free: |
1547 | dma_free_coherent(d, size, mem->cpu_va, mem->iova); | 1597 | gk20a_gmmu_free(vm->mm->g, mem); |
1548 | mem->cpu_va = NULL; | ||
1549 | mem->iova = 0; | ||
1550 | |||
1551 | return err; | 1598 | return err; |
1552 | } | 1599 | } |
1553 | 1600 | ||
1554 | void gk20a_gmmu_unmap_free(struct vm_gk20a *vm, struct mem_desc *mem) | 1601 | void gk20a_gmmu_unmap_free(struct vm_gk20a *vm, struct mem_desc *mem) |
1555 | { | 1602 | { |
1556 | struct gk20a *g = vm->mm->g; | ||
1557 | struct device *d = dev_from_gk20a(g); | ||
1558 | |||
1559 | if (mem->gpu_va) | 1603 | if (mem->gpu_va) |
1560 | gk20a_gmmu_unmap(vm, mem->gpu_va, mem->size, gk20a_mem_flag_none); | 1604 | gk20a_gmmu_unmap(vm, mem->gpu_va, mem->size, gk20a_mem_flag_none); |
1561 | mem->gpu_va = 0; | 1605 | mem->gpu_va = 0; |
1562 | 1606 | ||
1563 | if (mem->cpu_va) | 1607 | gk20a_gmmu_free(vm->mm->g, mem); |
1564 | dma_free_coherent(d, mem->size, mem->cpu_va, mem->iova); | ||
1565 | mem->cpu_va = NULL; | ||
1566 | mem->iova = 0; | ||
1567 | } | 1608 | } |
1568 | 1609 | ||
1569 | dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr) | 1610 | dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr) |
@@ -2644,42 +2685,24 @@ void gk20a_deinit_vm(struct vm_gk20a *vm) | |||
2644 | kfree(vm->pdb.entries); | 2685 | kfree(vm->pdb.entries); |
2645 | } | 2686 | } |
2646 | 2687 | ||
2647 | int gk20a_alloc_inst_block(struct gk20a *g, struct inst_desc *inst_block) | 2688 | int gk20a_alloc_inst_block(struct gk20a *g, struct mem_desc *inst_block) |
2648 | { | 2689 | { |
2649 | struct device *dev = dev_from_gk20a(g); | 2690 | struct device *dev = dev_from_gk20a(g); |
2650 | dma_addr_t iova; | 2691 | int err; |
2651 | 2692 | ||
2652 | inst_block->size = ram_in_alloc_size_v(); | 2693 | err = gk20a_gmmu_alloc(g, ram_in_alloc_size_v(), inst_block); |
2653 | inst_block->cpuva = dma_alloc_coherent(dev, inst_block->size, | 2694 | if (err) { |
2654 | &iova, GFP_KERNEL); | ||
2655 | if (!inst_block->cpuva) { | ||
2656 | gk20a_err(dev, "%s: memory allocation failed\n", __func__); | 2695 | gk20a_err(dev, "%s: memory allocation failed\n", __func__); |
2657 | return -ENOMEM; | 2696 | return err; |
2658 | } | ||
2659 | |||
2660 | inst_block->iova = iova; | ||
2661 | inst_block->cpu_pa = gk20a_get_phys_from_iova(dev, inst_block->iova); | ||
2662 | if (!inst_block->cpu_pa) { | ||
2663 | gk20a_err(dev, "%s: failed to get phys address\n", __func__); | ||
2664 | gk20a_free_inst_block(g, inst_block); | ||
2665 | return -ENOMEM; | ||
2666 | } | 2697 | } |
2667 | 2698 | ||
2668 | memset(inst_block->cpuva, 0, inst_block->size); | ||
2669 | |||
2670 | return 0; | 2699 | return 0; |
2671 | } | 2700 | } |
2672 | 2701 | ||
2673 | void gk20a_free_inst_block(struct gk20a *g, struct inst_desc *inst_block) | 2702 | void gk20a_free_inst_block(struct gk20a *g, struct mem_desc *inst_block) |
2674 | { | 2703 | { |
2675 | struct device *dev = dev_from_gk20a(g); | 2704 | if (inst_block->cpu_va) |
2676 | 2705 | gk20a_gmmu_free(g, inst_block); | |
2677 | if (inst_block->cpuva) { | ||
2678 | dma_free_coherent(dev, inst_block->size, | ||
2679 | inst_block->cpuva, inst_block->iova); | ||
2680 | } | ||
2681 | |||
2682 | memset(inst_block, 0, sizeof(*inst_block)); | ||
2683 | } | 2706 | } |
2684 | 2707 | ||
2685 | static int gk20a_init_bar1_vm(struct mm_gk20a *mm) | 2708 | static int gk20a_init_bar1_vm(struct mm_gk20a *mm) |
@@ -2687,7 +2710,7 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm) | |||
2687 | int err; | 2710 | int err; |
2688 | struct vm_gk20a *vm = &mm->bar1.vm; | 2711 | struct vm_gk20a *vm = &mm->bar1.vm; |
2689 | struct gk20a *g = gk20a_from_mm(mm); | 2712 | struct gk20a *g = gk20a_from_mm(mm); |
2690 | struct inst_desc *inst_block = &mm->bar1.inst_block; | 2713 | struct mem_desc *inst_block = &mm->bar1.inst_block; |
2691 | u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size; | 2714 | u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size; |
2692 | 2715 | ||
2693 | mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20; | 2716 | mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20; |
@@ -2713,7 +2736,7 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm) | |||
2713 | int err; | 2736 | int err; |
2714 | struct vm_gk20a *vm = &mm->pmu.vm; | 2737 | struct vm_gk20a *vm = &mm->pmu.vm; |
2715 | struct gk20a *g = gk20a_from_mm(mm); | 2738 | struct gk20a *g = gk20a_from_mm(mm); |
2716 | struct inst_desc *inst_block = &mm->pmu.inst_block; | 2739 | struct mem_desc *inst_block = &mm->pmu.inst_block; |
2717 | u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size; | 2740 | u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size; |
2718 | 2741 | ||
2719 | mm->pmu.aperture_size = GK20A_PMU_VA_SIZE; | 2742 | mm->pmu.aperture_size = GK20A_PMU_VA_SIZE; |
@@ -2739,7 +2762,7 @@ static int gk20a_init_hwpm(struct mm_gk20a *mm) | |||
2739 | int err; | 2762 | int err; |
2740 | struct vm_gk20a *vm = &mm->pmu.vm; | 2763 | struct vm_gk20a *vm = &mm->pmu.vm; |
2741 | struct gk20a *g = gk20a_from_mm(mm); | 2764 | struct gk20a *g = gk20a_from_mm(mm); |
2742 | struct inst_desc *inst_block = &mm->hwpm.inst_block; | 2765 | struct mem_desc *inst_block = &mm->hwpm.inst_block; |
2743 | 2766 | ||
2744 | err = gk20a_alloc_inst_block(g, inst_block); | 2767 | err = gk20a_alloc_inst_block(g, inst_block); |
2745 | if (err) | 2768 | if (err) |
@@ -2763,13 +2786,13 @@ void gk20a_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr) | |||
2763 | ram_in_page_dir_base_hi_f(pdb_addr_hi)); | 2786 | ram_in_page_dir_base_hi_f(pdb_addr_hi)); |
2764 | } | 2787 | } |
2765 | 2788 | ||
2766 | void gk20a_init_inst_block(struct inst_desc *inst_block, struct vm_gk20a *vm, | 2789 | void gk20a_init_inst_block(struct mem_desc *inst_block, struct vm_gk20a *vm, |
2767 | u32 big_page_size) | 2790 | u32 big_page_size) |
2768 | { | 2791 | { |
2769 | struct gk20a *g = gk20a_from_vm(vm); | 2792 | struct gk20a *g = gk20a_from_vm(vm); |
2770 | u64 pde_addr = gk20a_mm_iova_addr(g, vm->pdb.sgt->sgl); | 2793 | u64 pde_addr = gk20a_mm_iova_addr(g, vm->pdb.sgt->sgl); |
2771 | phys_addr_t inst_pa = inst_block->cpu_pa; | 2794 | phys_addr_t inst_pa = gk20a_mem_phys(inst_block); |
2772 | void *inst_ptr = inst_block->cpuva; | 2795 | void *inst_ptr = inst_block->cpu_va; |
2773 | 2796 | ||
2774 | gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p", | 2797 | gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p", |
2775 | (u64)inst_pa, inst_ptr); | 2798 | (u64)inst_pa, inst_ptr); |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 54028e73..ca7fef01 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -46,7 +46,7 @@ | |||
46 | 46 | ||
47 | struct mem_desc { | 47 | struct mem_desc { |
48 | void *cpu_va; | 48 | void *cpu_va; |
49 | dma_addr_t iova; | 49 | struct sg_table *sgt; |
50 | size_t size; | 50 | size_t size; |
51 | u64 gpu_va; | 51 | u64 gpu_va; |
52 | }; | 52 | }; |
@@ -70,40 +70,6 @@ struct gpfifo_desc { | |||
70 | u64 gpu_va; | 70 | u64 gpu_va; |
71 | }; | 71 | }; |
72 | 72 | ||
73 | struct mmu_desc { | ||
74 | void *cpuva; | ||
75 | u64 iova; | ||
76 | size_t size; | ||
77 | }; | ||
78 | |||
79 | struct inst_desc { | ||
80 | u64 iova; | ||
81 | void *cpuva; | ||
82 | phys_addr_t cpu_pa; | ||
83 | size_t size; | ||
84 | }; | ||
85 | |||
86 | struct surface_mem_desc { | ||
87 | u64 iova; | ||
88 | void *cpuva; | ||
89 | struct sg_table *sgt; | ||
90 | size_t size; | ||
91 | }; | ||
92 | |||
93 | struct userd_desc { | ||
94 | struct sg_table *sgt; | ||
95 | u64 iova; | ||
96 | void *cpuva; | ||
97 | size_t size; | ||
98 | u64 gpu_va; | ||
99 | }; | ||
100 | |||
101 | struct runlist_mem_desc { | ||
102 | u64 iova; | ||
103 | void *cpuva; | ||
104 | size_t size; | ||
105 | }; | ||
106 | |||
107 | struct patch_desc { | 73 | struct patch_desc { |
108 | struct page **pages; | 74 | struct page **pages; |
109 | u64 iova; | 75 | u64 iova; |
@@ -113,13 +79,6 @@ struct patch_desc { | |||
113 | u32 data_count; | 79 | u32 data_count; |
114 | }; | 80 | }; |
115 | 81 | ||
116 | struct pmu_mem_desc { | ||
117 | void *cpuva; | ||
118 | u64 iova; | ||
119 | u64 pmu_va; | ||
120 | size_t size; | ||
121 | }; | ||
122 | |||
123 | struct priv_cmd_queue_mem_desc { | 82 | struct priv_cmd_queue_mem_desc { |
124 | u64 base_iova; | 83 | u64 base_iova; |
125 | u32 *base_cpuva; | 84 | u32 *base_cpuva; |
@@ -336,24 +295,24 @@ struct mm_gk20a { | |||
336 | struct { | 295 | struct { |
337 | u32 aperture_size; | 296 | u32 aperture_size; |
338 | struct vm_gk20a vm; | 297 | struct vm_gk20a vm; |
339 | struct inst_desc inst_block; | 298 | struct mem_desc inst_block; |
340 | } bar1; | 299 | } bar1; |
341 | 300 | ||
342 | struct { | 301 | struct { |
343 | u32 aperture_size; | 302 | u32 aperture_size; |
344 | struct vm_gk20a vm; | 303 | struct vm_gk20a vm; |
345 | struct inst_desc inst_block; | 304 | struct mem_desc inst_block; |
346 | } bar2; | 305 | } bar2; |
347 | 306 | ||
348 | struct { | 307 | struct { |
349 | u32 aperture_size; | 308 | u32 aperture_size; |
350 | struct vm_gk20a vm; | 309 | struct vm_gk20a vm; |
351 | struct inst_desc inst_block; | 310 | struct mem_desc inst_block; |
352 | } pmu; | 311 | } pmu; |
353 | 312 | ||
354 | struct { | 313 | struct { |
355 | /* using pmu vm currently */ | 314 | /* using pmu vm currently */ |
356 | struct inst_desc inst_block; | 315 | struct mem_desc inst_block; |
357 | } hwpm; | 316 | } hwpm; |
358 | 317 | ||
359 | 318 | ||
@@ -406,9 +365,9 @@ static inline int max_vaddr_bits_gk20a(void) | |||
406 | #define bar1_instance_block_shift_gk20a() bus_bar1_block_ptr_shift_v() | 365 | #define bar1_instance_block_shift_gk20a() bus_bar1_block_ptr_shift_v() |
407 | #endif | 366 | #endif |
408 | 367 | ||
409 | int gk20a_alloc_inst_block(struct gk20a *g, struct inst_desc *inst_block); | 368 | int gk20a_alloc_inst_block(struct gk20a *g, struct mem_desc *inst_block); |
410 | void gk20a_free_inst_block(struct gk20a *g, struct inst_desc *inst_block); | 369 | void gk20a_free_inst_block(struct gk20a *g, struct mem_desc *inst_block); |
411 | void gk20a_init_inst_block(struct inst_desc *inst_block, struct vm_gk20a *vm, | 370 | void gk20a_init_inst_block(struct mem_desc *inst_block, struct vm_gk20a *vm, |
412 | u32 big_page_size); | 371 | u32 big_page_size); |
413 | 372 | ||
414 | void gk20a_mm_dump_vm(struct vm_gk20a *vm, | 373 | void gk20a_mm_dump_vm(struct vm_gk20a *vm, |
@@ -448,9 +407,31 @@ int gk20a_gmmu_alloc_map(struct vm_gk20a *vm, | |||
448 | size_t size, | 407 | size_t size, |
449 | struct mem_desc *mem); | 408 | struct mem_desc *mem); |
450 | 409 | ||
410 | int gk20a_gmmu_alloc_map_attr(struct vm_gk20a *vm, | ||
411 | enum dma_attr attr, | ||
412 | size_t size, | ||
413 | struct mem_desc *mem); | ||
414 | |||
451 | void gk20a_gmmu_unmap_free(struct vm_gk20a *vm, | 415 | void gk20a_gmmu_unmap_free(struct vm_gk20a *vm, |
452 | struct mem_desc *mem); | 416 | struct mem_desc *mem); |
453 | 417 | ||
418 | int gk20a_gmmu_alloc(struct gk20a *g, | ||
419 | size_t size, | ||
420 | struct mem_desc *mem); | ||
421 | |||
422 | int gk20a_gmmu_alloc_attr(struct gk20a *g, | ||
423 | enum dma_attr attr, | ||
424 | size_t size, | ||
425 | struct mem_desc *mem); | ||
426 | |||
427 | void gk20a_gmmu_free(struct gk20a *g, | ||
428 | struct mem_desc *mem); | ||
429 | |||
430 | static inline phys_addr_t gk20a_mem_phys(struct mem_desc *mem) | ||
431 | { | ||
432 | return sg_phys(mem->sgt->sgl); | ||
433 | } | ||
434 | |||
454 | u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, | 435 | u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, |
455 | u64 map_offset, | 436 | u64 map_offset, |
456 | struct sg_table *sgt, | 437 | struct sg_table *sgt, |
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c index f2430165..95bb1eb6 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c | |||
@@ -146,7 +146,7 @@ static void set_pmu_cmdline_args_falctracesize_v2( | |||
146 | 146 | ||
147 | static void set_pmu_cmdline_args_falctracedmabase_v2(struct pmu_gk20a *pmu) | 147 | static void set_pmu_cmdline_args_falctracedmabase_v2(struct pmu_gk20a *pmu) |
148 | { | 148 | { |
149 | pmu->args_v2.falc_trace_dma_base = ((u32)pmu->trace_buf.pmu_va)/0x100; | 149 | pmu->args_v2.falc_trace_dma_base = ((u32)pmu->trace_buf.gpu_va)/0x100; |
150 | } | 150 | } |
151 | 151 | ||
152 | static void set_pmu_cmdline_args_falctracedmaidx_v2( | 152 | static void set_pmu_cmdline_args_falctracedmaidx_v2( |
@@ -177,7 +177,7 @@ static void set_pmu_cmdline_args_falctracesize_v3( | |||
177 | 177 | ||
178 | static void set_pmu_cmdline_args_falctracedmabase_v3(struct pmu_gk20a *pmu) | 178 | static void set_pmu_cmdline_args_falctracedmabase_v3(struct pmu_gk20a *pmu) |
179 | { | 179 | { |
180 | pmu->args_v3.falc_trace_dma_base = ((u32)pmu->trace_buf.pmu_va)/0x100; | 180 | pmu->args_v3.falc_trace_dma_base = ((u32)pmu->trace_buf.gpu_va)/0x100; |
181 | } | 181 | } |
182 | 182 | ||
183 | static void set_pmu_cmdline_args_falctracedmaidx_v3( | 183 | static void set_pmu_cmdline_args_falctracedmaidx_v3( |
@@ -218,9 +218,9 @@ static bool find_hex_in_string(char *strings, struct gk20a *g, u32 *hex_pos) | |||
218 | static void printtrace(struct pmu_gk20a *pmu) | 218 | static void printtrace(struct pmu_gk20a *pmu) |
219 | { | 219 | { |
220 | u32 i = 0, j = 0, k, l, m, count; | 220 | u32 i = 0, j = 0, k, l, m, count; |
221 | char *trace = pmu->trace_buf.cpuva; | 221 | char *trace = pmu->trace_buf.cpu_va; |
222 | char part_str[40], buf[0x40]; | 222 | char part_str[40], buf[0x40]; |
223 | u32 *trace1 = pmu->trace_buf.cpuva; | 223 | u32 *trace1 = pmu->trace_buf.cpu_va; |
224 | struct gk20a *g = gk20a_from_pmu(pmu); | 224 | struct gk20a *g = gk20a_from_pmu(pmu); |
225 | gk20a_err(dev_from_gk20a(g), "Dump pmutrace"); | 225 | gk20a_err(dev_from_gk20a(g), "Dump pmutrace"); |
226 | for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 0x40) { | 226 | for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 0x40) { |
@@ -249,7 +249,7 @@ static void printtrace(struct pmu_gk20a *pmu) | |||
249 | 249 | ||
250 | static void set_pmu_cmdline_args_falctracedmabase_v1(struct pmu_gk20a *pmu) | 250 | static void set_pmu_cmdline_args_falctracedmabase_v1(struct pmu_gk20a *pmu) |
251 | { | 251 | { |
252 | pmu->args_v1.falc_trace_dma_base = ((u32)pmu->trace_buf.pmu_va)/0x100; | 252 | pmu->args_v1.falc_trace_dma_base = ((u32)pmu->trace_buf.gpu_va)/0x100; |
253 | } | 253 | } |
254 | 254 | ||
255 | static void set_pmu_cmdline_args_falctracedmaidx_v1( | 255 | static void set_pmu_cmdline_args_falctracedmaidx_v1( |
@@ -1349,7 +1349,7 @@ static int pmu_bootstrap(struct pmu_gk20a *pmu) | |||
1349 | pwr_falcon_itfen_ctxen_enable_f()); | 1349 | pwr_falcon_itfen_ctxen_enable_f()); |
1350 | gk20a_writel(g, pwr_pmu_new_instblk_r(), | 1350 | gk20a_writel(g, pwr_pmu_new_instblk_r(), |
1351 | pwr_pmu_new_instblk_ptr_f( | 1351 | pwr_pmu_new_instblk_ptr_f( |
1352 | mm->pmu.inst_block.cpu_pa >> 12) | | 1352 | sg_phys(mm->pmu.inst_block.sgt->sgl) >> 12) | |
1353 | pwr_pmu_new_instblk_valid_f(1) | | 1353 | pwr_pmu_new_instblk_valid_f(1) | |
1354 | pwr_pmu_new_instblk_target_sys_coh_f()); | 1354 | pwr_pmu_new_instblk_target_sys_coh_f()); |
1355 | 1355 | ||
@@ -1377,13 +1377,13 @@ static int pmu_bootstrap(struct pmu_gk20a *pmu) | |||
1377 | pwr_falcon_dmemc_blk_f(0) | | 1377 | pwr_falcon_dmemc_blk_f(0) | |
1378 | pwr_falcon_dmemc_aincw_f(1)); | 1378 | pwr_falcon_dmemc_aincw_f(1)); |
1379 | 1379 | ||
1380 | addr_code = u64_lo32((pmu->ucode.pmu_va + | 1380 | addr_code = u64_lo32((pmu->ucode.gpu_va + |
1381 | desc->app_start_offset + | 1381 | desc->app_start_offset + |
1382 | desc->app_resident_code_offset) >> 8) ; | 1382 | desc->app_resident_code_offset) >> 8) ; |
1383 | addr_data = u64_lo32((pmu->ucode.pmu_va + | 1383 | addr_data = u64_lo32((pmu->ucode.gpu_va + |
1384 | desc->app_start_offset + | 1384 | desc->app_start_offset + |
1385 | desc->app_resident_data_offset) >> 8); | 1385 | desc->app_resident_data_offset) >> 8); |
1386 | addr_load = u64_lo32((pmu->ucode.pmu_va + | 1386 | addr_load = u64_lo32((pmu->ucode.gpu_va + |
1387 | desc->bootloader_start_offset) >> 8); | 1387 | desc->bootloader_start_offset) >> 8); |
1388 | 1388 | ||
1389 | gk20a_writel(g, pwr_falcon_dmemd_r(0), GK20A_PMU_DMAIDX_UCODE); | 1389 | gk20a_writel(g, pwr_falcon_dmemd_r(0), GK20A_PMU_DMAIDX_UCODE); |
@@ -1942,13 +1942,10 @@ static int gk20a_prepare_ucode(struct gk20a *g) | |||
1942 | { | 1942 | { |
1943 | struct pmu_gk20a *pmu = &g->pmu; | 1943 | struct pmu_gk20a *pmu = &g->pmu; |
1944 | int i, err = 0; | 1944 | int i, err = 0; |
1945 | struct sg_table *sgt_pmu_ucode; | ||
1946 | dma_addr_t iova; | ||
1947 | struct device *d = dev_from_gk20a(g); | 1945 | struct device *d = dev_from_gk20a(g); |
1948 | struct mm_gk20a *mm = &g->mm; | 1946 | struct mm_gk20a *mm = &g->mm; |
1949 | struct vm_gk20a *vm = &mm->pmu.vm; | 1947 | struct vm_gk20a *vm = &mm->pmu.vm; |
1950 | void *ucode_ptr; | 1948 | void *ucode_ptr; |
1951 | DEFINE_DMA_ATTRS(attrs); | ||
1952 | 1949 | ||
1953 | if (g->pmu_fw) { | 1950 | if (g->pmu_fw) { |
1954 | gk20a_init_pmu(pmu); | 1951 | gk20a_init_pmu(pmu); |
@@ -1967,56 +1964,21 @@ static int gk20a_prepare_ucode(struct gk20a *g) | |||
1967 | pmu->ucode_image = (u32 *)((u8 *)pmu->desc + | 1964 | pmu->ucode_image = (u32 *)((u8 *)pmu->desc + |
1968 | pmu->desc->descriptor_size); | 1965 | pmu->desc->descriptor_size); |
1969 | 1966 | ||
1970 | dma_set_attr(DMA_ATTR_READ_ONLY, &attrs); | 1967 | err = gk20a_gmmu_alloc_map_attr(vm, DMA_ATTR_READ_ONLY, |
1971 | pmu->ucode.cpuva = dma_alloc_attrs(d, GK20A_PMU_UCODE_SIZE_MAX, | 1968 | GK20A_PMU_UCODE_SIZE_MAX, &pmu->ucode); |
1972 | &iova, | 1969 | if (err) |
1973 | GFP_KERNEL, | ||
1974 | &attrs); | ||
1975 | if (!pmu->ucode.cpuva) { | ||
1976 | gk20a_err(d, "failed to allocate memory\n"); | ||
1977 | err = -ENOMEM; | ||
1978 | goto err_release_fw; | 1970 | goto err_release_fw; |
1979 | } | ||
1980 | |||
1981 | pmu->ucode.iova = iova; | ||
1982 | |||
1983 | err = gk20a_get_sgtable(d, &sgt_pmu_ucode, | ||
1984 | pmu->ucode.cpuva, | ||
1985 | pmu->ucode.iova, | ||
1986 | GK20A_PMU_UCODE_SIZE_MAX); | ||
1987 | if (err) { | ||
1988 | gk20a_err(d, "failed to allocate sg table\n"); | ||
1989 | goto err_free_pmu_ucode; | ||
1990 | } | ||
1991 | 1971 | ||
1992 | pmu->ucode.pmu_va = gk20a_gmmu_map(vm, &sgt_pmu_ucode, | 1972 | ucode_ptr = pmu->ucode.cpu_va; |
1993 | GK20A_PMU_UCODE_SIZE_MAX, | ||
1994 | 0, /* flags */ | ||
1995 | gk20a_mem_flag_read_only); | ||
1996 | if (!pmu->ucode.pmu_va) { | ||
1997 | gk20a_err(d, "failed to map pmu ucode memory!!"); | ||
1998 | goto err_free_ucode_sgt; | ||
1999 | } | ||
2000 | |||
2001 | ucode_ptr = pmu->ucode.cpuva; | ||
2002 | 1973 | ||
2003 | for (i = 0; i < (pmu->desc->app_start_offset + | 1974 | for (i = 0; i < (pmu->desc->app_start_offset + |
2004 | pmu->desc->app_size) >> 2; i++) | 1975 | pmu->desc->app_size) >> 2; i++) |
2005 | gk20a_mem_wr32(ucode_ptr, i, pmu->ucode_image[i]); | 1976 | gk20a_mem_wr32(ucode_ptr, i, pmu->ucode_image[i]); |
2006 | 1977 | ||
2007 | gk20a_free_sgtable(&sgt_pmu_ucode); | ||
2008 | |||
2009 | gk20a_init_pmu(pmu); | 1978 | gk20a_init_pmu(pmu); |
2010 | 1979 | ||
2011 | return 0; | 1980 | return 0; |
2012 | 1981 | ||
2013 | err_free_ucode_sgt: | ||
2014 | gk20a_free_sgtable(&sgt_pmu_ucode); | ||
2015 | err_free_pmu_ucode: | ||
2016 | dma_free_attrs(d, GK20A_PMU_UCODE_SIZE_MAX, | ||
2017 | pmu->ucode.cpuva, pmu->ucode.iova, &attrs); | ||
2018 | pmu->ucode.cpuva = NULL; | ||
2019 | pmu->ucode.iova = 0; | ||
2020 | err_release_fw: | 1982 | err_release_fw: |
2021 | release_firmware(g->pmu_fw); | 1983 | release_firmware(g->pmu_fw); |
2022 | 1984 | ||
@@ -2031,9 +1993,6 @@ static int gk20a_init_pmu_setup_sw(struct gk20a *g) | |||
2031 | struct device *d = dev_from_gk20a(g); | 1993 | struct device *d = dev_from_gk20a(g); |
2032 | int i, err = 0; | 1994 | int i, err = 0; |
2033 | u8 *ptr; | 1995 | u8 *ptr; |
2034 | struct sg_table *sgt_seq_buf; | ||
2035 | struct sg_table *sgt_pmu_buf; | ||
2036 | dma_addr_t iova; | ||
2037 | 1996 | ||
2038 | gk20a_dbg_fn(""); | 1997 | gk20a_dbg_fn(""); |
2039 | 1998 | ||
@@ -2082,70 +2041,19 @@ static int gk20a_init_pmu_setup_sw(struct gk20a *g) | |||
2082 | 2041 | ||
2083 | INIT_WORK(&pmu->pg_init, pmu_setup_hw); | 2042 | INIT_WORK(&pmu->pg_init, pmu_setup_hw); |
2084 | 2043 | ||
2085 | pmu->seq_buf.cpuva = dma_alloc_coherent(d, GK20A_PMU_SEQ_BUF_SIZE, | 2044 | err = gk20a_gmmu_alloc_map(vm, GK20A_PMU_SEQ_BUF_SIZE, &pmu->seq_buf); |
2086 | &iova, | 2045 | if (err) { |
2087 | GFP_KERNEL); | ||
2088 | if (!pmu->seq_buf.cpuva) { | ||
2089 | gk20a_err(d, "failed to allocate memory\n"); | 2046 | gk20a_err(d, "failed to allocate memory\n"); |
2090 | err = -ENOMEM; | ||
2091 | goto err_free_seq; | 2047 | goto err_free_seq; |
2092 | } | 2048 | } |
2093 | 2049 | ||
2094 | pmu->seq_buf.iova = iova; | 2050 | err = gk20a_gmmu_alloc_map(vm, GK20A_PMU_TRACE_BUFSIZE, &pmu->trace_buf); |
2095 | 2051 | if (err) { | |
2096 | pmu->trace_buf.cpuva = dma_alloc_coherent(d, GK20A_PMU_TRACE_BUFSIZE, | ||
2097 | &iova, | ||
2098 | GFP_KERNEL); | ||
2099 | if (!pmu->trace_buf.cpuva) { | ||
2100 | gk20a_err(d, "failed to allocate trace memory\n"); | 2052 | gk20a_err(d, "failed to allocate trace memory\n"); |
2101 | err = -ENOMEM; | ||
2102 | goto err_free_seq_buf; | 2053 | goto err_free_seq_buf; |
2103 | } | 2054 | } |
2104 | pmu->trace_buf.iova = iova; | ||
2105 | 2055 | ||
2106 | err = gk20a_get_sgtable(d, &sgt_seq_buf, | 2056 | ptr = (u8 *)pmu->seq_buf.cpu_va; |
2107 | pmu->seq_buf.cpuva, | ||
2108 | pmu->seq_buf.iova, | ||
2109 | GK20A_PMU_SEQ_BUF_SIZE); | ||
2110 | if (err) { | ||
2111 | gk20a_err(d, "failed to allocate seq buf sg table\n"); | ||
2112 | goto err_free_trace_buf; | ||
2113 | } | ||
2114 | |||
2115 | pmu->seq_buf.pmu_va = gk20a_gmmu_map(vm, &sgt_seq_buf, | ||
2116 | GK20A_PMU_SEQ_BUF_SIZE, | ||
2117 | 0, /* flags */ | ||
2118 | gk20a_mem_flag_none); | ||
2119 | if (!pmu->seq_buf.pmu_va) { | ||
2120 | gk20a_err(d, "failed to gmmu map seq buf memory!!"); | ||
2121 | err = -ENOMEM; | ||
2122 | goto err_free_seq_buf_sgt; | ||
2123 | } | ||
2124 | |||
2125 | err = gk20a_get_sgtable(d, &sgt_pmu_buf, | ||
2126 | pmu->trace_buf.cpuva, | ||
2127 | pmu->trace_buf.iova, | ||
2128 | GK20A_PMU_TRACE_BUFSIZE); | ||
2129 | if (err) { | ||
2130 | gk20a_err(d, "failed to allocate sg table for Trace\n"); | ||
2131 | goto err_unmap_seq_buf; | ||
2132 | } | ||
2133 | |||
2134 | pmu->trace_buf.pmu_va = gk20a_gmmu_map(vm, &sgt_pmu_buf, | ||
2135 | GK20A_PMU_TRACE_BUFSIZE, | ||
2136 | 0, /* flags */ | ||
2137 | gk20a_mem_flag_none); | ||
2138 | if (!pmu->trace_buf.pmu_va) { | ||
2139 | gk20a_err(d, "failed to gmmu map pmu trace memory!!"); | ||
2140 | err = -ENOMEM; | ||
2141 | goto err_free_trace_buf_sgt; | ||
2142 | } | ||
2143 | |||
2144 | ptr = (u8 *)pmu->seq_buf.cpuva; | ||
2145 | if (!ptr) { | ||
2146 | gk20a_err(d, "failed to map cpu ptr for zbc buffer"); | ||
2147 | goto err_unmap_trace_buf; | ||
2148 | } | ||
2149 | 2057 | ||
2150 | /* TBD: remove this if ZBC save/restore is handled by PMU | 2058 | /* TBD: remove this if ZBC save/restore is handled by PMU |
2151 | * end an empty ZBC sequence for now */ | 2059 | * end an empty ZBC sequence for now */ |
@@ -2155,35 +2063,13 @@ static int gk20a_init_pmu_setup_sw(struct gk20a *g) | |||
2155 | 2063 | ||
2156 | pmu->seq_buf.size = GK20A_PMU_SEQ_BUF_SIZE; | 2064 | pmu->seq_buf.size = GK20A_PMU_SEQ_BUF_SIZE; |
2157 | 2065 | ||
2158 | gk20a_free_sgtable(&sgt_seq_buf); | ||
2159 | gk20a_free_sgtable(&sgt_pmu_buf); | ||
2160 | |||
2161 | pmu->sw_ready = true; | 2066 | pmu->sw_ready = true; |
2162 | 2067 | ||
2163 | skip_init: | 2068 | skip_init: |
2164 | gk20a_dbg_fn("done"); | 2069 | gk20a_dbg_fn("done"); |
2165 | return 0; | 2070 | return 0; |
2166 | err_unmap_trace_buf: | ||
2167 | gk20a_gmmu_unmap(vm, pmu->trace_buf.pmu_va, | ||
2168 | GK20A_PMU_TRACE_BUFSIZE, gk20a_mem_flag_none); | ||
2169 | err_free_trace_buf_sgt: | ||
2170 | gk20a_free_sgtable(&sgt_pmu_buf); | ||
2171 | err_unmap_seq_buf: | ||
2172 | gk20a_gmmu_unmap(vm, pmu->seq_buf.pmu_va, | ||
2173 | GK20A_PMU_SEQ_BUF_SIZE, gk20a_mem_flag_none); | ||
2174 | err_free_seq_buf_sgt: | ||
2175 | gk20a_free_sgtable(&sgt_seq_buf); | ||
2176 | err_free_trace_buf: | ||
2177 | dma_free_coherent(d, GK20A_PMU_TRACE_BUFSIZE, | ||
2178 | pmu->trace_buf.cpuva, pmu->trace_buf.iova); | ||
2179 | pmu->trace_buf.cpuva = NULL; | ||
2180 | pmu->trace_buf.iova = 0; | ||
2181 | |||
2182 | err_free_seq_buf: | 2071 | err_free_seq_buf: |
2183 | dma_free_coherent(d, GK20A_PMU_SEQ_BUF_SIZE, | 2072 | gk20a_gmmu_unmap_free(vm, &pmu->seq_buf); |
2184 | pmu->seq_buf.cpuva, pmu->seq_buf.iova); | ||
2185 | pmu->seq_buf.cpuva = NULL; | ||
2186 | pmu->seq_buf.iova = 0; | ||
2187 | err_free_seq: | 2073 | err_free_seq: |
2188 | kfree(pmu->seq); | 2074 | kfree(pmu->seq); |
2189 | err_free_mutex: | 2075 | err_free_mutex: |
@@ -2306,8 +2192,8 @@ int gk20a_init_pmu_bind_fecs(struct gk20a *g) | |||
2306 | cmd.cmd.pg.eng_buf_load.engine_id = ENGINE_GR_GK20A; | 2192 | cmd.cmd.pg.eng_buf_load.engine_id = ENGINE_GR_GK20A; |
2307 | cmd.cmd.pg.eng_buf_load.buf_idx = PMU_PGENG_GR_BUFFER_IDX_FECS; | 2193 | cmd.cmd.pg.eng_buf_load.buf_idx = PMU_PGENG_GR_BUFFER_IDX_FECS; |
2308 | cmd.cmd.pg.eng_buf_load.buf_size = pmu->pg_buf.size; | 2194 | cmd.cmd.pg.eng_buf_load.buf_size = pmu->pg_buf.size; |
2309 | cmd.cmd.pg.eng_buf_load.dma_base = u64_lo32(pmu->pg_buf.pmu_va >> 8); | 2195 | cmd.cmd.pg.eng_buf_load.dma_base = u64_lo32(pmu->pg_buf.gpu_va >> 8); |
2310 | cmd.cmd.pg.eng_buf_load.dma_offset = (u8)(pmu->pg_buf.pmu_va & 0xFF); | 2196 | cmd.cmd.pg.eng_buf_load.dma_offset = (u8)(pmu->pg_buf.gpu_va & 0xFF); |
2311 | cmd.cmd.pg.eng_buf_load.dma_idx = PMU_DMAIDX_VIRT; | 2197 | cmd.cmd.pg.eng_buf_load.dma_idx = PMU_DMAIDX_VIRT; |
2312 | 2198 | ||
2313 | pmu->buf_loaded = false; | 2199 | pmu->buf_loaded = false; |
@@ -2331,8 +2217,8 @@ static void pmu_setup_hw_load_zbc(struct gk20a *g) | |||
2331 | cmd.cmd.pg.eng_buf_load.engine_id = ENGINE_GR_GK20A; | 2217 | cmd.cmd.pg.eng_buf_load.engine_id = ENGINE_GR_GK20A; |
2332 | cmd.cmd.pg.eng_buf_load.buf_idx = PMU_PGENG_GR_BUFFER_IDX_ZBC; | 2218 | cmd.cmd.pg.eng_buf_load.buf_idx = PMU_PGENG_GR_BUFFER_IDX_ZBC; |
2333 | cmd.cmd.pg.eng_buf_load.buf_size = pmu->seq_buf.size; | 2219 | cmd.cmd.pg.eng_buf_load.buf_size = pmu->seq_buf.size; |
2334 | cmd.cmd.pg.eng_buf_load.dma_base = u64_lo32(pmu->seq_buf.pmu_va >> 8); | 2220 | cmd.cmd.pg.eng_buf_load.dma_base = u64_lo32(pmu->seq_buf.gpu_va >> 8); |
2335 | cmd.cmd.pg.eng_buf_load.dma_offset = (u8)(pmu->seq_buf.pmu_va & 0xFF); | 2221 | cmd.cmd.pg.eng_buf_load.dma_offset = (u8)(pmu->seq_buf.gpu_va & 0xFF); |
2336 | cmd.cmd.pg.eng_buf_load.dma_idx = PMU_DMAIDX_VIRT; | 2222 | cmd.cmd.pg.eng_buf_load.dma_idx = PMU_DMAIDX_VIRT; |
2337 | 2223 | ||
2338 | pmu->buf_loaded = false; | 2224 | pmu->buf_loaded = false; |
@@ -4100,9 +3986,9 @@ static int falc_trace_show(struct seq_file *s, void *data) | |||
4100 | struct gk20a *g = s->private; | 3986 | struct gk20a *g = s->private; |
4101 | struct pmu_gk20a *pmu = &g->pmu; | 3987 | struct pmu_gk20a *pmu = &g->pmu; |
4102 | u32 i = 0, j = 0, k, l, m; | 3988 | u32 i = 0, j = 0, k, l, m; |
4103 | char *trace = pmu->trace_buf.cpuva; | 3989 | char *trace = pmu->trace_buf.cpu_va; |
4104 | char part_str[40]; | 3990 | char part_str[40]; |
4105 | u32 *trace1 = pmu->trace_buf.cpuva; | 3991 | u32 *trace1 = pmu->trace_buf.cpu_va; |
4106 | for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 0x40) { | 3992 | for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 0x40) { |
4107 | for (j = 0; j < 0x40; j++) | 3993 | for (j = 0; j < 0x40; j++) |
4108 | if (trace1[(i / 4) + j]) | 3994 | if (trace1[(i / 4) + j]) |
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h index e4865180..6cd173e8 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h | |||
@@ -1124,12 +1124,12 @@ struct pmu_pg_stats { | |||
1124 | struct pmu_gk20a { | 1124 | struct pmu_gk20a { |
1125 | 1125 | ||
1126 | struct pmu_ucode_desc *desc; | 1126 | struct pmu_ucode_desc *desc; |
1127 | struct pmu_mem_desc ucode; | 1127 | struct mem_desc ucode; |
1128 | 1128 | ||
1129 | struct pmu_mem_desc pg_buf; | 1129 | struct mem_desc pg_buf; |
1130 | /* TBD: remove this if ZBC seq is fixed */ | 1130 | /* TBD: remove this if ZBC seq is fixed */ |
1131 | struct pmu_mem_desc seq_buf; | 1131 | struct mem_desc seq_buf; |
1132 | struct pmu_mem_desc trace_buf; | 1132 | struct mem_desc trace_buf; |
1133 | bool buf_loaded; | 1133 | bool buf_loaded; |
1134 | 1134 | ||
1135 | struct pmu_sha1_gid gid_info; | 1135 | struct pmu_sha1_gid gid_info; |
diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c index a58f726a..ecb0f8ab 100644 --- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c | |||
@@ -194,7 +194,7 @@ int fecs_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img) | |||
194 | g->ctxsw_ucode_info.fecs.code.offset; | 194 | g->ctxsw_ucode_info.fecs.code.offset; |
195 | p_img->desc->app_resident_data_size = | 195 | p_img->desc->app_resident_data_size = |
196 | g->ctxsw_ucode_info.fecs.data.size; | 196 | g->ctxsw_ucode_info.fecs.data.size; |
197 | p_img->data = g->ctxsw_ucode_info.surface_desc.cpuva; | 197 | p_img->data = g->ctxsw_ucode_info.surface_desc.cpu_va; |
198 | p_img->data_size = p_img->desc->image_size; | 198 | p_img->data_size = p_img->desc->image_size; |
199 | 199 | ||
200 | p_img->fw_ver = NULL; | 200 | p_img->fw_ver = NULL; |
@@ -874,11 +874,8 @@ int gm20b_bootstrap_hs_flcn(struct gk20a *g) | |||
874 | { | 874 | { |
875 | struct mm_gk20a *mm = &g->mm; | 875 | struct mm_gk20a *mm = &g->mm; |
876 | struct vm_gk20a *vm = &mm->pmu.vm; | 876 | struct vm_gk20a *vm = &mm->pmu.vm; |
877 | struct device *d = dev_from_gk20a(g); | ||
878 | int i, err = 0; | 877 | int i, err = 0; |
879 | struct sg_table *sgt_pmu_ucode = NULL; | 878 | u64 *acr_dmem; |
880 | dma_addr_t iova; | ||
881 | u64 *pacr_ucode_cpuva = NULL, pacr_ucode_pmu_va = 0, *acr_dmem; | ||
882 | u32 img_size_in_bytes = 0; | 879 | u32 img_size_in_bytes = 0; |
883 | u32 status, size; | 880 | u32 status, size; |
884 | u64 start; | 881 | u64 start; |
@@ -924,36 +921,18 @@ int gm20b_bootstrap_hs_flcn(struct gk20a *g) | |||
924 | err = -1; | 921 | err = -1; |
925 | goto err_release_acr_fw; | 922 | goto err_release_acr_fw; |
926 | } | 923 | } |
927 | pacr_ucode_cpuva = dma_alloc_coherent(d, img_size_in_bytes, | 924 | err = gk20a_gmmu_alloc_map(vm, img_size_in_bytes, |
928 | &iova, GFP_KERNEL); | 925 | &acr->acr_ucode); |
929 | if (!pacr_ucode_cpuva) { | 926 | if (err) { |
930 | err = -ENOMEM; | 927 | err = -ENOMEM; |
931 | goto err_release_acr_fw; | 928 | goto err_release_acr_fw; |
932 | } | 929 | } |
933 | 930 | ||
934 | err = gk20a_get_sgtable(d, &sgt_pmu_ucode, | ||
935 | pacr_ucode_cpuva, | ||
936 | iova, | ||
937 | img_size_in_bytes); | ||
938 | if (err) { | ||
939 | gk20a_err(d, "failed to allocate sg table\n"); | ||
940 | err = -ENOMEM; | ||
941 | goto err_free_acr_buf; | ||
942 | } | ||
943 | pacr_ucode_pmu_va = gk20a_gmmu_map(vm, &sgt_pmu_ucode, | ||
944 | img_size_in_bytes, | ||
945 | 0, /* flags */ | ||
946 | gk20a_mem_flag_read_only); | ||
947 | if (!pacr_ucode_pmu_va) { | ||
948 | gk20a_err(d, "failed to map pmu ucode memory!!"); | ||
949 | err = -ENOMEM; | ||
950 | goto err_free_ucode_sgt; | ||
951 | } | ||
952 | acr_dmem = (u64 *) | 931 | acr_dmem = (u64 *) |
953 | &(((u8 *)acr_ucode_data_t210_load)[ | 932 | &(((u8 *)acr_ucode_data_t210_load)[ |
954 | acr_ucode_header_t210_load[2]]); | 933 | acr_ucode_header_t210_load[2]]); |
955 | acr->acr_dmem_desc = (struct flcn_acr_desc *)((u8 *)( | 934 | acr->acr_dmem_desc = (struct flcn_acr_desc *)((u8 *)( |
956 | pacr_ucode_cpuva) + acr_ucode_header_t210_load[2]); | 935 | acr->acr_ucode.cpu_va) + acr_ucode_header_t210_load[2]); |
957 | ((struct flcn_acr_desc *)acr_dmem)->nonwpr_ucode_blob_start = | 936 | ((struct flcn_acr_desc *)acr_dmem)->nonwpr_ucode_blob_start = |
958 | start; | 937 | start; |
959 | ((struct flcn_acr_desc *)acr_dmem)->nonwpr_ucode_blob_size = | 938 | ((struct flcn_acr_desc *)acr_dmem)->nonwpr_ucode_blob_size = |
@@ -962,13 +941,9 @@ int gm20b_bootstrap_hs_flcn(struct gk20a *g) | |||
962 | ((struct flcn_acr_desc *)acr_dmem)->wpr_offset = 0; | 941 | ((struct flcn_acr_desc *)acr_dmem)->wpr_offset = 0; |
963 | 942 | ||
964 | for (i = 0; i < (img_size_in_bytes/4); i++) { | 943 | for (i = 0; i < (img_size_in_bytes/4); i++) { |
965 | gk20a_mem_wr32(pacr_ucode_cpuva, i, | 944 | gk20a_mem_wr32(acr->acr_ucode.cpu_va, i, |
966 | acr_ucode_data_t210_load[i]); | 945 | acr_ucode_data_t210_load[i]); |
967 | } | 946 | } |
968 | acr->acr_ucode.cpuva = pacr_ucode_cpuva; | ||
969 | acr->acr_ucode.iova = iova; | ||
970 | acr->acr_ucode.pmu_va = pacr_ucode_pmu_va; | ||
971 | acr->acr_ucode.size = img_size_in_bytes; | ||
972 | /* | 947 | /* |
973 | * In order to execute this binary, we will be using | 948 | * In order to execute this binary, we will be using |
974 | * a bootloader which will load this image into PMU IMEM/DMEM. | 949 | * a bootloader which will load this image into PMU IMEM/DMEM. |
@@ -983,7 +958,7 @@ int gm20b_bootstrap_hs_flcn(struct gk20a *g) | |||
983 | bl_dmem_desc->signature[3] = 0; | 958 | bl_dmem_desc->signature[3] = 0; |
984 | bl_dmem_desc->ctx_dma = GK20A_PMU_DMAIDX_VIRT; | 959 | bl_dmem_desc->ctx_dma = GK20A_PMU_DMAIDX_VIRT; |
985 | bl_dmem_desc->code_dma_base = | 960 | bl_dmem_desc->code_dma_base = |
986 | (unsigned int)(((u64)pacr_ucode_pmu_va >> 8)); | 961 | (unsigned int)(((u64)acr->acr_ucode.gpu_va >> 8)); |
987 | bl_dmem_desc->non_sec_code_off = acr_ucode_header_t210_load[0]; | 962 | bl_dmem_desc->non_sec_code_off = acr_ucode_header_t210_load[0]; |
988 | bl_dmem_desc->non_sec_code_size = acr_ucode_header_t210_load[1]; | 963 | bl_dmem_desc->non_sec_code_size = acr_ucode_header_t210_load[1]; |
989 | bl_dmem_desc->sec_code_off = acr_ucode_header_t210_load[5]; | 964 | bl_dmem_desc->sec_code_off = acr_ucode_header_t210_load[5]; |
@@ -993,8 +968,6 @@ int gm20b_bootstrap_hs_flcn(struct gk20a *g) | |||
993 | bl_dmem_desc->code_dma_base + | 968 | bl_dmem_desc->code_dma_base + |
994 | ((acr_ucode_header_t210_load[2]) >> 8); | 969 | ((acr_ucode_header_t210_load[2]) >> 8); |
995 | bl_dmem_desc->data_size = acr_ucode_header_t210_load[3]; | 970 | bl_dmem_desc->data_size = acr_ucode_header_t210_load[3]; |
996 | gk20a_free_sgtable(&sgt_pmu_ucode); | ||
997 | sgt_pmu_ucode = NULL; | ||
998 | } else | 971 | } else |
999 | acr->acr_dmem_desc->nonwpr_ucode_blob_size = 0; | 972 | acr->acr_dmem_desc->nonwpr_ucode_blob_size = 0; |
1000 | status = pmu_exec_gen_bl(g, bl_dmem_desc, 1); | 973 | status = pmu_exec_gen_bl(g, bl_dmem_desc, 1); |
@@ -1004,17 +977,7 @@ int gm20b_bootstrap_hs_flcn(struct gk20a *g) | |||
1004 | } | 977 | } |
1005 | return 0; | 978 | return 0; |
1006 | err_free_ucode_map: | 979 | err_free_ucode_map: |
1007 | gk20a_gmmu_unmap(vm, pacr_ucode_pmu_va, | 980 | gk20a_gmmu_unmap_free(vm, &acr->acr_ucode); |
1008 | img_size_in_bytes, gk20a_mem_flag_none); | ||
1009 | acr->acr_ucode.pmu_va = 0; | ||
1010 | err_free_ucode_sgt: | ||
1011 | if (sgt_pmu_ucode) | ||
1012 | gk20a_free_sgtable(&sgt_pmu_ucode); | ||
1013 | err_free_acr_buf: | ||
1014 | dma_free_coherent(d, img_size_in_bytes, | ||
1015 | pacr_ucode_cpuva, iova); | ||
1016 | acr->acr_ucode.cpuva = NULL; | ||
1017 | acr->acr_ucode.iova = 0; | ||
1018 | err_release_acr_fw: | 981 | err_release_acr_fw: |
1019 | release_firmware(acr_fw); | 982 | release_firmware(acr_fw); |
1020 | acr->acr_fw = NULL; | 983 | acr->acr_fw = NULL; |
@@ -1078,7 +1041,7 @@ static int bl_bootstrap(struct pmu_gk20a *pmu, | |||
1078 | pwr_falcon_itfen_ctxen_enable_f()); | 1041 | pwr_falcon_itfen_ctxen_enable_f()); |
1079 | gk20a_writel(g, pwr_pmu_new_instblk_r(), | 1042 | gk20a_writel(g, pwr_pmu_new_instblk_r(), |
1080 | pwr_pmu_new_instblk_ptr_f( | 1043 | pwr_pmu_new_instblk_ptr_f( |
1081 | mm->pmu.inst_block.cpu_pa >> 12) | | 1044 | sg_phys(mm->pmu.inst_block.sgt->sgl) >> 12) | |
1082 | pwr_pmu_new_instblk_valid_f(1) | | 1045 | pwr_pmu_new_instblk_valid_f(1) | |
1083 | pwr_pmu_new_instblk_target_sys_coh_f()); | 1046 | pwr_pmu_new_instblk_target_sys_coh_f()); |
1084 | 1047 | ||
@@ -1104,7 +1067,7 @@ static int bl_bootstrap(struct pmu_gk20a *pmu, | |||
1104 | pwr_falcon_imemc_aincw_f(1)); | 1067 | pwr_falcon_imemc_aincw_f(1)); |
1105 | virt_addr = pmu_bl_gm10x_desc->bl_start_tag << 8; | 1068 | virt_addr = pmu_bl_gm10x_desc->bl_start_tag << 8; |
1106 | tag = virt_addr >> 8; /* tag is always 256B aligned */ | 1069 | tag = virt_addr >> 8; /* tag is always 256B aligned */ |
1107 | bl_ucode = (u32 *)(acr->hsbl_ucode.cpuva); | 1070 | bl_ucode = (u32 *)(acr->hsbl_ucode.cpu_va); |
1108 | for (index = 0; index < bl_sz/4; index++) { | 1071 | for (index = 0; index < bl_sz/4; index++) { |
1109 | if ((index % 64) == 0) { | 1072 | if ((index % 64) == 0) { |
1110 | gk20a_writel(g, pwr_falcon_imemt_r(0), | 1073 | gk20a_writel(g, pwr_falcon_imemt_r(0), |
@@ -1198,16 +1161,11 @@ int pmu_exec_gen_bl(struct gk20a *g, void *desc, u8 b_wait_for_halt) | |||
1198 | struct vm_gk20a *vm = &mm->pmu.vm; | 1161 | struct vm_gk20a *vm = &mm->pmu.vm; |
1199 | struct device *d = dev_from_gk20a(g); | 1162 | struct device *d = dev_from_gk20a(g); |
1200 | int i, err = 0; | 1163 | int i, err = 0; |
1201 | struct sg_table *sgt_pmu_ucode = NULL; | ||
1202 | dma_addr_t iova; | ||
1203 | u32 bl_sz; | 1164 | u32 bl_sz; |
1204 | void *bl_cpuva; | ||
1205 | u64 bl_pmu_va; | ||
1206 | struct acr_gm20b *acr = &g->acr; | 1165 | struct acr_gm20b *acr = &g->acr; |
1207 | const struct firmware *hsbl_fw = acr->hsbl_fw; | 1166 | const struct firmware *hsbl_fw = acr->hsbl_fw; |
1208 | struct hsflcn_bl_desc *pmu_bl_gm10x_desc; | 1167 | struct hsflcn_bl_desc *pmu_bl_gm10x_desc; |
1209 | u32 *pmu_bl_gm10x = NULL; | 1168 | u32 *pmu_bl_gm10x = NULL; |
1210 | DEFINE_DMA_ATTRS(attrs); | ||
1211 | gm20b_dbg_pmu(""); | 1169 | gm20b_dbg_pmu(""); |
1212 | 1170 | ||
1213 | if (!hsbl_fw) { | 1171 | if (!hsbl_fw) { |
@@ -1232,44 +1190,25 @@ int pmu_exec_gen_bl(struct gk20a *g, void *desc, u8 b_wait_for_halt) | |||
1232 | /*TODO in code verify that enable PMU is done, | 1190 | /*TODO in code verify that enable PMU is done, |
1233 | scrubbing etc is done*/ | 1191 | scrubbing etc is done*/ |
1234 | /*TODO in code verify that gmmu vm init is done*/ | 1192 | /*TODO in code verify that gmmu vm init is done*/ |
1235 | dma_set_attr(DMA_ATTR_READ_ONLY, &attrs); | 1193 | err = gk20a_gmmu_alloc_attr(g, |
1236 | bl_cpuva = dma_alloc_attrs(d, bl_sz, | 1194 | DMA_ATTR_READ_ONLY, bl_sz, &acr->hsbl_ucode); |
1237 | &iova, | 1195 | if (err) { |
1238 | GFP_KERNEL, | ||
1239 | &attrs); | ||
1240 | gm20b_dbg_pmu("bl size is %x\n", bl_sz); | ||
1241 | if (!bl_cpuva) { | ||
1242 | gk20a_err(d, "failed to allocate memory\n"); | 1196 | gk20a_err(d, "failed to allocate memory\n"); |
1243 | err = -ENOMEM; | ||
1244 | goto err_done; | 1197 | goto err_done; |
1245 | } | 1198 | } |
1246 | acr->hsbl_ucode.cpuva = bl_cpuva; | ||
1247 | acr->hsbl_ucode.iova = iova; | ||
1248 | |||
1249 | err = gk20a_get_sgtable(d, &sgt_pmu_ucode, | ||
1250 | bl_cpuva, | ||
1251 | iova, | ||
1252 | bl_sz); | ||
1253 | if (err) { | ||
1254 | gk20a_err(d, "failed to allocate sg table\n"); | ||
1255 | goto err_free_cpu_va; | ||
1256 | } | ||
1257 | 1199 | ||
1258 | bl_pmu_va = gk20a_gmmu_map(vm, &sgt_pmu_ucode, | 1200 | acr->hsbl_ucode.gpu_va = gk20a_gmmu_map(vm, &acr->hsbl_ucode.sgt, |
1259 | bl_sz, | 1201 | bl_sz, |
1260 | 0, /* flags */ | 1202 | 0, /* flags */ |
1261 | gk20a_mem_flag_read_only); | 1203 | gk20a_mem_flag_read_only); |
1262 | if (!bl_pmu_va) { | 1204 | if (!acr->hsbl_ucode.gpu_va) { |
1263 | gk20a_err(d, "failed to map pmu ucode memory!!"); | 1205 | gk20a_err(d, "failed to map pmu ucode memory!!"); |
1264 | goto err_free_ucode_sgt; | 1206 | goto err_free_ucode; |
1265 | } | 1207 | } |
1266 | acr->hsbl_ucode.pmu_va = bl_pmu_va; | ||
1267 | 1208 | ||
1268 | for (i = 0; i < (bl_sz) >> 2; i++) | 1209 | for (i = 0; i < (bl_sz) >> 2; i++) |
1269 | gk20a_mem_wr32(bl_cpuva, i, pmu_bl_gm10x[i]); | 1210 | gk20a_mem_wr32(acr->hsbl_ucode.cpu_va, i, pmu_bl_gm10x[i]); |
1270 | gm20b_dbg_pmu("Copied bl ucode to bl_cpuva\n"); | 1211 | gm20b_dbg_pmu("Copied bl ucode to bl_cpuva\n"); |
1271 | gk20a_free_sgtable(&sgt_pmu_ucode); | ||
1272 | sgt_pmu_ucode = NULL; | ||
1273 | } | 1212 | } |
1274 | /* | 1213 | /* |
1275 | * Disable interrupts to avoid kernel hitting breakpoint due | 1214 | * Disable interrupts to avoid kernel hitting breakpoint due |
@@ -1306,14 +1245,10 @@ int pmu_exec_gen_bl(struct gk20a *g, void *desc, u8 b_wait_for_halt) | |||
1306 | start_gm20b_pmu(g); | 1245 | start_gm20b_pmu(g); |
1307 | return 0; | 1246 | return 0; |
1308 | err_unmap_bl: | 1247 | err_unmap_bl: |
1309 | gk20a_gmmu_unmap(vm, acr->hsbl_ucode.pmu_va, | 1248 | gk20a_gmmu_unmap(vm, acr->hsbl_ucode.gpu_va, |
1310 | acr->hsbl_ucode.size, gk20a_mem_flag_none); | 1249 | acr->hsbl_ucode.size, gk20a_mem_flag_none); |
1311 | err_free_ucode_sgt: | 1250 | err_free_ucode: |
1312 | if (sgt_pmu_ucode) | 1251 | gk20a_gmmu_free(g, &acr->hsbl_ucode); |
1313 | gk20a_free_sgtable(&sgt_pmu_ucode); | ||
1314 | err_free_cpu_va: | ||
1315 | dma_free_attrs(d, acr->hsbl_ucode.size, | ||
1316 | acr->hsbl_ucode.cpuva, acr->hsbl_ucode.iova, &attrs); | ||
1317 | err_done: | 1252 | err_done: |
1318 | release_firmware(hsbl_fw); | 1253 | release_firmware(hsbl_fw); |
1319 | return err; | 1254 | return err; |
diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.h b/drivers/gpu/nvgpu/gm20b/acr_gm20b.h index e5d126f8..d26f91ff 100644 --- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.h | |||
@@ -381,9 +381,9 @@ struct acr_gm20b { | |||
381 | u32 pmu_args; | 381 | u32 pmu_args; |
382 | const struct firmware *acr_fw; | 382 | const struct firmware *acr_fw; |
383 | struct flcn_acr_desc *acr_dmem_desc; | 383 | struct flcn_acr_desc *acr_dmem_desc; |
384 | struct pmu_mem_desc acr_ucode; | 384 | struct mem_desc acr_ucode; |
385 | const struct firmware *hsbl_fw; | 385 | const struct firmware *hsbl_fw; |
386 | struct pmu_mem_desc hsbl_ucode; | 386 | struct mem_desc hsbl_ucode; |
387 | struct flcn_bl_dmem_desc bl_dmem_desc; | 387 | struct flcn_bl_dmem_desc bl_dmem_desc; |
388 | const struct firmware *pmu_fw; | 388 | const struct firmware *pmu_fw; |
389 | const struct firmware *pmu_desc; | 389 | const struct firmware *pmu_desc; |
diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c index 6d186c10..10d2a13e 100644 --- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c | |||
@@ -24,7 +24,7 @@ static void channel_gm20b_bind(struct channel_gk20a *ch_gk20a) | |||
24 | { | 24 | { |
25 | struct gk20a *g = ch_gk20a->g; | 25 | struct gk20a *g = ch_gk20a->g; |
26 | 26 | ||
27 | u32 inst_ptr = ch_gk20a->inst_block.cpu_pa | 27 | u32 inst_ptr = sg_phys(ch_gk20a->inst_block.sgt->sgl) |
28 | >> ram_in_base_shift_v(); | 28 | >> ram_in_base_shift_v(); |
29 | 29 | ||
30 | gk20a_dbg_info("bind channel %d inst ptr 0x%08x", | 30 | gk20a_dbg_info("bind channel %d inst ptr 0x%08x", |
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index c199964f..5ade9e6c 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c | |||
@@ -656,7 +656,7 @@ static u32 gr_gm20b_get_tpc_num(u32 addr) | |||
656 | static void gr_gm20b_load_gpccs_with_bootloader(struct gk20a *g) | 656 | static void gr_gm20b_load_gpccs_with_bootloader(struct gk20a *g) |
657 | { | 657 | { |
658 | struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; | 658 | struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; |
659 | u64 addr_base = ucode_info->ucode_gpuva; | 659 | u64 addr_base = ucode_info->surface_desc.gpu_va; |
660 | 660 | ||
661 | gr_gk20a_load_falcon_bind_instblk(g); | 661 | gr_gk20a_load_falcon_bind_instblk(g); |
662 | 662 | ||
diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c index 45d956a2..25e6e4c7 100644 --- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c | |||
@@ -196,19 +196,11 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f) | |||
196 | 196 | ||
197 | runlist_size = sizeof(u16) * f->num_channels; | 197 | runlist_size = sizeof(u16) * f->num_channels; |
198 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { | 198 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { |
199 | dma_addr_t iova; | 199 | int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]); |
200 | 200 | if (err) { | |
201 | runlist->mem[i].cpuva = | ||
202 | dma_alloc_coherent(d, | ||
203 | runlist_size, | ||
204 | &iova, | ||
205 | GFP_KERNEL); | ||
206 | if (!runlist->mem[i].cpuva) { | ||
207 | dev_err(d, "memory allocation failed\n"); | 201 | dev_err(d, "memory allocation failed\n"); |
208 | goto clean_up_runlist; | 202 | goto clean_up_runlist; |
209 | } | 203 | } |
210 | runlist->mem[i].iova = iova; | ||
211 | runlist->mem[i].size = runlist_size; | ||
212 | } | 204 | } |
213 | mutex_init(&runlist->mutex); | 205 | mutex_init(&runlist->mutex); |
214 | 206 | ||
@@ -220,15 +212,8 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f) | |||
220 | return 0; | 212 | return 0; |
221 | 213 | ||
222 | clean_up_runlist: | 214 | clean_up_runlist: |
223 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { | 215 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) |
224 | if (runlist->mem[i].cpuva) | 216 | gk20a_gmmu_free(g, &runlist->mem[i]); |
225 | dma_free_coherent(d, | ||
226 | runlist->mem[i].size, | ||
227 | runlist->mem[i].cpuva, | ||
228 | runlist->mem[i].iova); | ||
229 | runlist->mem[i].cpuva = NULL; | ||
230 | runlist->mem[i].iova = 0; | ||
231 | } | ||
232 | 217 | ||
233 | kfree(runlist->active_channels); | 218 | kfree(runlist->active_channels); |
234 | runlist->active_channels = NULL; | 219 | runlist->active_channels = NULL; |
@@ -248,7 +233,6 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g) | |||
248 | struct fifo_gk20a *f = &g->fifo; | 233 | struct fifo_gk20a *f = &g->fifo; |
249 | struct device *d = dev_from_gk20a(g); | 234 | struct device *d = dev_from_gk20a(g); |
250 | int chid, err = 0; | 235 | int chid, err = 0; |
251 | dma_addr_t iova; | ||
252 | 236 | ||
253 | gk20a_dbg_fn(""); | 237 | gk20a_dbg_fn(""); |
254 | 238 | ||
@@ -268,28 +252,16 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g) | |||
268 | f->max_engines = ENGINE_INVAL_GK20A; | 252 | f->max_engines = ENGINE_INVAL_GK20A; |
269 | 253 | ||
270 | f->userd_entry_size = 1 << ram_userd_base_shift_v(); | 254 | f->userd_entry_size = 1 << ram_userd_base_shift_v(); |
271 | f->userd_total_size = f->userd_entry_size * f->num_channels; | ||
272 | 255 | ||
273 | f->userd.cpuva = dma_alloc_coherent(d, | 256 | err = gk20a_gmmu_alloc(g, f->userd_entry_size * f->num_channels, |
274 | f->userd_total_size, | 257 | &f->userd); |
275 | &iova, | ||
276 | GFP_KERNEL); | ||
277 | if (!f->userd.cpuva) { | ||
278 | dev_err(d, "memory allocation failed\n"); | ||
279 | goto clean_up; | ||
280 | } | ||
281 | |||
282 | f->userd.iova = iova; | ||
283 | err = gk20a_get_sgtable(d, &f->userd.sgt, | ||
284 | f->userd.cpuva, f->userd.iova, | ||
285 | f->userd_total_size); | ||
286 | if (err) { | 258 | if (err) { |
287 | dev_err(d, "failed to create sg table\n"); | 259 | dev_err(d, "memory allocation failed\n"); |
288 | goto clean_up; | 260 | goto clean_up; |
289 | } | 261 | } |
290 | 262 | ||
291 | /* bar1 va */ | 263 | /* bar1 va */ |
292 | f->userd.gpu_va = vgpu_bar1_map(g, &f->userd.sgt, f->userd_total_size); | 264 | f->userd.gpu_va = vgpu_bar1_map(g, &f->userd.sgt, f->userd.size); |
293 | if (!f->userd.gpu_va) { | 265 | if (!f->userd.gpu_va) { |
294 | dev_err(d, "gmmu mapping failed\n"); | 266 | dev_err(d, "gmmu mapping failed\n"); |
295 | goto clean_up; | 267 | goto clean_up; |
@@ -297,8 +269,6 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g) | |||
297 | 269 | ||
298 | gk20a_dbg(gpu_dbg_map, "userd bar1 va = 0x%llx", f->userd.gpu_va); | 270 | gk20a_dbg(gpu_dbg_map, "userd bar1 va = 0x%llx", f->userd.gpu_va); |
299 | 271 | ||
300 | f->userd.size = f->userd_total_size; | ||
301 | |||
302 | f->channel = kzalloc(f->num_channels * sizeof(*f->channel), | 272 | f->channel = kzalloc(f->num_channels * sizeof(*f->channel), |
303 | GFP_KERNEL); | 273 | GFP_KERNEL); |
304 | f->engine_info = kzalloc(f->max_engines * sizeof(*f->engine_info), | 274 | f->engine_info = kzalloc(f->max_engines * sizeof(*f->engine_info), |
@@ -315,9 +285,9 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g) | |||
315 | 285 | ||
316 | for (chid = 0; chid < f->num_channels; chid++) { | 286 | for (chid = 0; chid < f->num_channels; chid++) { |
317 | f->channel[chid].userd_cpu_va = | 287 | f->channel[chid].userd_cpu_va = |
318 | f->userd.cpuva + chid * f->userd_entry_size; | 288 | f->userd.cpu_va + chid * f->userd_entry_size; |
319 | f->channel[chid].userd_iova = | 289 | f->channel[chid].userd_iova = |
320 | gk20a_mm_smmu_vaddr_translate(g, f->userd.iova) | 290 | gk20a_mm_iova_addr(g, f->userd.sgt->sgl) |
321 | + chid * f->userd_entry_size; | 291 | + chid * f->userd_entry_size; |
322 | f->channel[chid].userd_gpu_va = | 292 | f->channel[chid].userd_gpu_va = |
323 | f->userd.gpu_va + chid * f->userd_entry_size; | 293 | f->userd.gpu_va + chid * f->userd_entry_size; |
@@ -337,17 +307,9 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g) | |||
337 | clean_up: | 307 | clean_up: |
338 | gk20a_dbg_fn("fail"); | 308 | gk20a_dbg_fn("fail"); |
339 | /* FIXME: unmap from bar1 */ | 309 | /* FIXME: unmap from bar1 */ |
340 | if (f->userd.sgt) | 310 | gk20a_gmmu_free(g, &f->userd); |
341 | gk20a_free_sgtable(&f->userd.sgt); | 311 | |
342 | if (f->userd.cpuva) | 312 | memset(&f->userd, 0, sizeof(f->userd)); |
343 | dma_free_coherent(d, | ||
344 | f->userd_total_size, | ||
345 | f->userd.cpuva, | ||
346 | f->userd.iova); | ||
347 | f->userd.cpuva = NULL; | ||
348 | f->userd.iova = 0; | ||
349 | |||
350 | memset(&f->userd, 0, sizeof(struct userd_desc)); | ||
351 | 313 | ||
352 | kfree(f->channel); | 314 | kfree(f->channel); |
353 | f->channel = NULL; | 315 | f->channel = NULL; |
@@ -368,7 +330,7 @@ static int vgpu_init_fifo_setup_hw(struct gk20a *g) | |||
368 | u32 v, v1 = 0x33, v2 = 0x55; | 330 | u32 v, v1 = 0x33, v2 = 0x55; |
369 | 331 | ||
370 | u32 bar1_vaddr = f->userd.gpu_va; | 332 | u32 bar1_vaddr = f->userd.gpu_va; |
371 | volatile u32 *cpu_vaddr = f->userd.cpuva; | 333 | volatile u32 *cpu_vaddr = f->userd.cpu_va; |
372 | 334 | ||
373 | gk20a_dbg_info("test bar1 @ vaddr 0x%x", | 335 | gk20a_dbg_info("test bar1 @ vaddr 0x%x", |
374 | bar1_vaddr); | 336 | bar1_vaddr); |
@@ -505,7 +467,7 @@ static int vgpu_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, | |||
505 | add /* resume to add all channels back */) { | 467 | add /* resume to add all channels back */) { |
506 | u32 chid; | 468 | u32 chid; |
507 | 469 | ||
508 | runlist_entry = runlist->mem[0].cpuva; | 470 | runlist_entry = runlist->mem[0].cpu_va; |
509 | for_each_set_bit(chid, | 471 | for_each_set_bit(chid, |
510 | runlist->active_channels, f->num_channels) { | 472 | runlist->active_channels, f->num_channels) { |
511 | gk20a_dbg_info("add channel %d to runlist", chid); | 473 | gk20a_dbg_info("add channel %d to runlist", chid); |
@@ -517,7 +479,7 @@ static int vgpu_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, | |||
517 | count = 0; | 479 | count = 0; |
518 | 480 | ||
519 | return vgpu_submit_runlist(platform->virt_handle, runlist_id, | 481 | return vgpu_submit_runlist(platform->virt_handle, runlist_id, |
520 | runlist->mem[0].cpuva, count); | 482 | runlist->mem[0].cpu_va, count); |
521 | } | 483 | } |
522 | 484 | ||
523 | /* add/remove a channel from runlist | 485 | /* add/remove a channel from runlist |