summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2015-02-26 17:37:43 -0500
committerDan Willemsen <dwillemsen@nvidia.com>2015-04-04 21:59:26 -0400
commit7290a6cbd5d03145d6f1ca4c3eacba40f6d4f93c (patch)
treede452c09f5eef76af273041dc64997fdc351dbd6 /drivers/gpu/nvgpu
parentbb51cf9ec6482b50f3020179965ef82f58d91a0a (diff)
gpu: nvgpu: Implement common allocator and mem_desc
Introduce mem_desc, which holds all information needed for a buffer. Implement helper functions for allocation and freeing that use this data type. Change-Id: I82c88595d058d4fb8c5c5fbf19d13269e48e422f Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/712699
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r--drivers/gpu/nvgpu/gk20a/cde_gk20a.c75
-rw-r--r--drivers/gpu/nvgpu/gk20a/cde_gk20a.h10
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c12
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.h2
-rw-r--r--drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c2
-rw-r--r--drivers/gpu/nvgpu/gk20a/debug_gk20a.c2
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c125
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.h5
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c166
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.h11
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c135
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h79
-rw-r--r--drivers/gpu/nvgpu/gk20a/pmu_gk20a.c164
-rw-r--r--drivers/gpu/nvgpu/gk20a/pmu_gk20a.h8
-rw-r--r--drivers/gpu/nvgpu/gm20b/acr_gm20b.c107
-rw-r--r--drivers/gpu/nvgpu/gm20b/acr_gm20b.h4
-rw-r--r--drivers/gpu/nvgpu/gm20b/fifo_gm20b.c2
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_gm20b.c2
-rw-r--r--drivers/gpu/nvgpu/vgpu/fifo_vgpu.c70
19 files changed, 279 insertions, 702 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
index ea01914c..fb368fda 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
@@ -46,15 +46,11 @@ static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct gk20a *g);
46 46
47static void gk20a_deinit_cde_img(struct gk20a_cde_ctx *cde_ctx) 47static void gk20a_deinit_cde_img(struct gk20a_cde_ctx *cde_ctx)
48{ 48{
49 struct device *dev = &cde_ctx->pdev->dev;
50 int i; 49 int i;
51 50
52 for (i = 0; i < cde_ctx->num_bufs; i++) { 51 for (i = 0; i < cde_ctx->num_bufs; i++) {
53 struct gk20a_cde_mem_desc *mem = cde_ctx->mem + i; 52 struct mem_desc *mem = cde_ctx->mem + i;
54 gk20a_gmmu_unmap(cde_ctx->vm, mem->gpu_va, mem->num_bytes, 1); 53 gk20a_gmmu_unmap_free(cde_ctx->vm, mem);
55 gk20a_free_sgtable(&mem->sgt);
56 dma_free_writecombine(dev, mem->num_bytes, mem->cpuva,
57 mem->iova);
58 } 54 }
59 55
60 kfree(cde_ctx->init_convert_cmd); 56 kfree(cde_ctx->init_convert_cmd);
@@ -225,8 +221,7 @@ static int gk20a_init_cde_buf(struct gk20a_cde_ctx *cde_ctx,
225 const struct firmware *img, 221 const struct firmware *img,
226 struct gk20a_cde_hdr_buf *buf) 222 struct gk20a_cde_hdr_buf *buf)
227{ 223{
228 struct device *dev = &cde_ctx->pdev->dev; 224 struct mem_desc *mem;
229 struct gk20a_cde_mem_desc *mem;
230 int err; 225 int err;
231 226
232 /* check that the file can hold the buf */ 227 /* check that the file can hold the buf */
@@ -246,49 +241,21 @@ static int gk20a_init_cde_buf(struct gk20a_cde_ctx *cde_ctx,
246 241
247 /* allocate buf */ 242 /* allocate buf */
248 mem = cde_ctx->mem + cde_ctx->num_bufs; 243 mem = cde_ctx->mem + cde_ctx->num_bufs;
249 mem->num_bytes = buf->num_bytes; 244 err = gk20a_gmmu_alloc_map(cde_ctx->vm, buf->num_bytes, mem);
250 mem->cpuva = dma_alloc_writecombine(dev, mem->num_bytes, &mem->iova, 245 if (err) {
251 GFP_KERNEL);
252 if (!mem->cpuva) {
253 gk20a_warn(&cde_ctx->pdev->dev, "cde: could not allocate device memory. buffer idx = %d", 246 gk20a_warn(&cde_ctx->pdev->dev, "cde: could not allocate device memory. buffer idx = %d",
254 cde_ctx->num_bufs); 247 cde_ctx->num_bufs);
255 return -ENOMEM; 248 return -ENOMEM;
256 } 249 }
257 250
258 err = gk20a_get_sgtable(dev, &mem->sgt, mem->cpuva, mem->iova,
259 mem->num_bytes);
260 if (err) {
261 gk20a_warn(&cde_ctx->pdev->dev, "cde: could not get sg table. buffer idx = %d",
262 cde_ctx->num_bufs);
263 err = -ENOMEM;
264 goto err_get_sgtable;
265 }
266
267 mem->gpu_va = gk20a_gmmu_map(cde_ctx->vm, &mem->sgt, mem->num_bytes,
268 0,
269 gk20a_mem_flag_none);
270 if (!mem->gpu_va) {
271 gk20a_warn(&cde_ctx->pdev->dev, "cde: could not map buffer to gpuva. buffer idx = %d",
272 cde_ctx->num_bufs);
273 err = -ENOMEM;
274 goto err_map_buffer;
275 }
276
277 /* copy the content */ 251 /* copy the content */
278 if (buf->data_byte_offset != 0) 252 if (buf->data_byte_offset != 0)
279 memcpy(mem->cpuva, img->data + buf->data_byte_offset, 253 memcpy(mem->cpu_va, img->data + buf->data_byte_offset,
280 buf->num_bytes); 254 buf->num_bytes);
281 255
282 cde_ctx->num_bufs++; 256 cde_ctx->num_bufs++;
283 257
284 return 0; 258 return 0;
285
286err_map_buffer:
287 gk20a_free_sgtable(&mem->sgt);
288 kfree(mem->sgt);
289err_get_sgtable:
290 dma_free_writecombine(dev, mem->num_bytes, &mem->cpuva, mem->iova);
291 return err;
292} 259}
293 260
294static int gk20a_replace_data(struct gk20a_cde_ctx *cde_ctx, void *target, 261static int gk20a_replace_data(struct gk20a_cde_ctx *cde_ctx, void *target,
@@ -340,8 +307,8 @@ static int gk20a_init_cde_replace(struct gk20a_cde_ctx *cde_ctx,
340 const struct firmware *img, 307 const struct firmware *img,
341 struct gk20a_cde_hdr_replace *replace) 308 struct gk20a_cde_hdr_replace *replace)
342{ 309{
343 struct gk20a_cde_mem_desc *source_mem; 310 struct mem_desc *source_mem;
344 struct gk20a_cde_mem_desc *target_mem; 311 struct mem_desc *target_mem;
345 u32 *target_mem_ptr; 312 u32 *target_mem_ptr;
346 u64 vaddr; 313 u64 vaddr;
347 int err; 314 int err;
@@ -356,15 +323,15 @@ static int gk20a_init_cde_replace(struct gk20a_cde_ctx *cde_ctx,
356 323
357 source_mem = cde_ctx->mem + replace->source_buf; 324 source_mem = cde_ctx->mem + replace->source_buf;
358 target_mem = cde_ctx->mem + replace->target_buf; 325 target_mem = cde_ctx->mem + replace->target_buf;
359 target_mem_ptr = target_mem->cpuva; 326 target_mem_ptr = target_mem->cpu_va;
360 327
361 if (source_mem->num_bytes < (replace->source_byte_offset + 3) || 328 if (source_mem->size < (replace->source_byte_offset + 3) ||
362 target_mem->num_bytes < (replace->target_byte_offset + 3)) { 329 target_mem->size < (replace->target_byte_offset + 3)) {
363 gk20a_warn(&cde_ctx->pdev->dev, "cde: invalid buffer offsets. target_buf_offs=%lld, source_buf_offs=%lld, source_buf_size=%zu, dest_buf_size=%zu", 330 gk20a_warn(&cde_ctx->pdev->dev, "cde: invalid buffer offsets. target_buf_offs=%lld, source_buf_offs=%lld, source_buf_size=%zu, dest_buf_size=%zu",
364 replace->target_byte_offset, 331 replace->target_byte_offset,
365 replace->source_byte_offset, 332 replace->source_byte_offset,
366 source_mem->num_bytes, 333 source_mem->size,
367 target_mem->num_bytes); 334 target_mem->size);
368 return -EINVAL; 335 return -EINVAL;
369 } 336 }
370 337
@@ -390,7 +357,7 @@ static int gk20a_init_cde_replace(struct gk20a_cde_ctx *cde_ctx,
390static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx) 357static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx)
391{ 358{
392 struct gk20a *g = cde_ctx->g; 359 struct gk20a *g = cde_ctx->g;
393 struct gk20a_cde_mem_desc *target_mem; 360 struct mem_desc *target_mem;
394 u32 *target_mem_ptr; 361 u32 *target_mem_ptr;
395 u64 new_data; 362 u64 new_data;
396 int user_id = 0, i, err; 363 int user_id = 0, i, err;
@@ -398,7 +365,7 @@ static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx)
398 for (i = 0; i < cde_ctx->num_params; i++) { 365 for (i = 0; i < cde_ctx->num_params; i++) {
399 struct gk20a_cde_hdr_param *param = cde_ctx->params + i; 366 struct gk20a_cde_hdr_param *param = cde_ctx->params + i;
400 target_mem = cde_ctx->mem + param->target_buf; 367 target_mem = cde_ctx->mem + param->target_buf;
401 target_mem_ptr = target_mem->cpuva; 368 target_mem_ptr = target_mem->cpu_va;
402 target_mem_ptr += (param->target_byte_offset / sizeof(u32)); 369 target_mem_ptr += (param->target_byte_offset / sizeof(u32));
403 370
404 switch (param->id) { 371 switch (param->id) {
@@ -472,7 +439,7 @@ static int gk20a_init_cde_param(struct gk20a_cde_ctx *cde_ctx,
472 const struct firmware *img, 439 const struct firmware *img,
473 struct gk20a_cde_hdr_param *param) 440 struct gk20a_cde_hdr_param *param)
474{ 441{
475 struct gk20a_cde_mem_desc *target_mem; 442 struct mem_desc *target_mem;
476 443
477 if (param->target_buf >= cde_ctx->num_bufs) { 444 if (param->target_buf >= cde_ctx->num_bufs) {
478 gk20a_warn(&cde_ctx->pdev->dev, "cde: invalid buffer parameter. param idx = %d, target_buf=%u, num_bufs=%u", 445 gk20a_warn(&cde_ctx->pdev->dev, "cde: invalid buffer parameter. param idx = %d, target_buf=%u, num_bufs=%u",
@@ -482,10 +449,10 @@ static int gk20a_init_cde_param(struct gk20a_cde_ctx *cde_ctx,
482 } 449 }
483 450
484 target_mem = cde_ctx->mem + param->target_buf; 451 target_mem = cde_ctx->mem + param->target_buf;
485 if (target_mem->num_bytes < (param->target_byte_offset + 3)) { 452 if (target_mem->size< (param->target_byte_offset + 3)) {
486 gk20a_warn(&cde_ctx->pdev->dev, "cde: invalid buffer parameter. param idx = %d, target_buf_offs=%lld, target_buf_size=%zu", 453 gk20a_warn(&cde_ctx->pdev->dev, "cde: invalid buffer parameter. param idx = %d, target_buf_offs=%lld, target_buf_size=%zu",
487 cde_ctx->num_params, param->target_byte_offset, 454 cde_ctx->num_params, param->target_byte_offset,
488 target_mem->num_bytes); 455 target_mem->size);
489 return -EINVAL; 456 return -EINVAL;
490 } 457 }
491 458
@@ -563,7 +530,7 @@ static int gk20a_init_cde_command(struct gk20a_cde_ctx *cde_ctx,
563 530
564 gpfifo_elem = *gpfifo; 531 gpfifo_elem = *gpfifo;
565 for (i = 0; i < num_elems; i++, cmd_elem++, gpfifo_elem++) { 532 for (i = 0; i < num_elems; i++, cmd_elem++, gpfifo_elem++) {
566 struct gk20a_cde_mem_desc *target_mem; 533 struct mem_desc *target_mem;
567 534
568 /* validate the current entry */ 535 /* validate the current entry */
569 if (cmd_elem->target_buf >= cde_ctx->num_bufs) { 536 if (cmd_elem->target_buf >= cde_ctx->num_bufs) {
@@ -573,10 +540,10 @@ static int gk20a_init_cde_command(struct gk20a_cde_ctx *cde_ctx,
573 } 540 }
574 541
575 target_mem = cde_ctx->mem + cmd_elem->target_buf; 542 target_mem = cde_ctx->mem + cmd_elem->target_buf;
576 if (target_mem->num_bytes < 543 if (target_mem->size<
577 cmd_elem->target_byte_offset + cmd_elem->num_bytes) { 544 cmd_elem->target_byte_offset + cmd_elem->num_bytes) {
578 gk20a_warn(&cde_ctx->pdev->dev, "cde: target buffer cannot hold all entries (target_size=%zu, target_byte_offset=%lld, num_bytes=%llu)", 545 gk20a_warn(&cde_ctx->pdev->dev, "cde: target buffer cannot hold all entries (target_size=%zu, target_byte_offset=%lld, num_bytes=%llu)",
579 target_mem->num_bytes, 546 target_mem->size,
580 cmd_elem->target_byte_offset, 547 cmd_elem->target_byte_offset,
581 cmd_elem->num_bytes); 548 cmd_elem->num_bytes);
582 return -EINVAL; 549 return -EINVAL;
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.h b/drivers/gpu/nvgpu/gk20a/cde_gk20a.h
index 58480d26..a5c75ae8 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.h
@@ -205,14 +205,6 @@ enum {
205 TYPE_ARRAY 205 TYPE_ARRAY
206}; 206};
207 207
208struct gk20a_cde_mem_desc {
209 struct sg_table *sgt;
210 dma_addr_t iova;
211 void *cpuva;
212 size_t num_bytes;
213 u64 gpu_va;
214};
215
216struct gk20a_cde_param { 208struct gk20a_cde_param {
217 u32 id; 209 u32 id;
218 u32 padding; 210 u32 padding;
@@ -228,7 +220,7 @@ struct gk20a_cde_ctx {
228 struct vm_gk20a *vm; 220 struct vm_gk20a *vm;
229 221
230 /* buf converter configuration */ 222 /* buf converter configuration */
231 struct gk20a_cde_mem_desc mem[MAX_CDE_BUFS]; 223 struct mem_desc mem[MAX_CDE_BUFS];
232 int num_bufs; 224 int num_bufs;
233 225
234 /* buffer patching params (where should patching be done) */ 226 /* buffer patching params (where should patching be done) */
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 62092930..9a0800d1 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -89,7 +89,7 @@ int channel_gk20a_commit_va(struct channel_gk20a *c)
89{ 89{
90 gk20a_dbg_fn(""); 90 gk20a_dbg_fn("");
91 91
92 if (!c->inst_block.cpuva) 92 if (!c->inst_block.cpu_va)
93 return -ENOMEM; 93 return -ENOMEM;
94 94
95 gk20a_init_inst_block(&c->inst_block, c->vm, 95 gk20a_init_inst_block(&c->inst_block, c->vm,
@@ -106,7 +106,7 @@ static int channel_gk20a_commit_userd(struct channel_gk20a *c)
106 106
107 gk20a_dbg_fn(""); 107 gk20a_dbg_fn("");
108 108
109 inst_ptr = c->inst_block.cpuva; 109 inst_ptr = c->inst_block.cpu_va;
110 if (!inst_ptr) 110 if (!inst_ptr)
111 return -ENOMEM; 111 return -ENOMEM;
112 112
@@ -134,7 +134,7 @@ static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
134 int shift = 3; 134 int shift = 3;
135 int value = timeslice_timeout; 135 int value = timeslice_timeout;
136 136
137 inst_ptr = c->inst_block.cpuva; 137 inst_ptr = c->inst_block.cpu_va;
138 if (!inst_ptr) 138 if (!inst_ptr)
139 return -ENOMEM; 139 return -ENOMEM;
140 140
@@ -177,7 +177,7 @@ int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
177 177
178 gk20a_dbg_fn(""); 178 gk20a_dbg_fn("");
179 179
180 inst_ptr = c->inst_block.cpuva; 180 inst_ptr = c->inst_block.cpu_va;
181 if (!inst_ptr) 181 if (!inst_ptr)
182 return -ENOMEM; 182 return -ENOMEM;
183 183
@@ -263,7 +263,7 @@ static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a)
263 struct fifo_engine_info_gk20a *engine_info = 263 struct fifo_engine_info_gk20a *engine_info =
264 f->engine_info + ENGINE_GR_GK20A; 264 f->engine_info + ENGINE_GR_GK20A;
265 265
266 u32 inst_ptr = ch_gk20a->inst_block.cpu_pa 266 u32 inst_ptr = sg_phys(ch_gk20a->inst_block.sgt->sgl)
267 >> ram_in_base_shift_v(); 267 >> ram_in_base_shift_v();
268 268
269 gk20a_dbg_info("bind channel %d inst ptr 0x%08x", 269 gk20a_dbg_info("bind channel %d inst ptr 0x%08x",
@@ -322,7 +322,7 @@ int channel_gk20a_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
322 return err; 322 return err;
323 323
324 gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx", 324 gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx",
325 ch->hw_chid, (u64)ch->inst_block.cpu_pa); 325 ch->hw_chid, (u64)sg_phys(ch->inst_block.sgt->sgl));
326 326
327 gk20a_dbg_fn("done"); 327 gk20a_dbg_fn("done");
328 return 0; 328 return 0;
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index 034de53f..ddb91f9b 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -99,7 +99,7 @@ struct channel_gk20a {
99 99
100 struct channel_ctx_gk20a ch_ctx; 100 struct channel_ctx_gk20a ch_ctx;
101 101
102 struct inst_desc inst_block; 102 struct mem_desc inst_block;
103 struct mem_desc_sub ramfc; 103 struct mem_desc_sub ramfc;
104 104
105 void *userd_cpu_va; 105 void *userd_cpu_va;
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index 7cda9949..217f0056 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -851,7 +851,7 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
851 gk20a_writel(g, perf_pmasys_outsize_r(), virt_size); 851 gk20a_writel(g, perf_pmasys_outsize_r(), virt_size);
852 852
853 /* this field is aligned to 4K */ 853 /* this field is aligned to 4K */
854 inst_pa_page = g->mm.hwpm.inst_block.cpu_pa >> 12; 854 inst_pa_page = gk20a_mem_phys(&g->mm.hwpm.inst_block) >> 12;
855 855
856 /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK 856 /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK
857 * should be written last */ 857 * should be written last */
diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
index 9dfab370..ace05c07 100644
--- a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
@@ -104,7 +104,7 @@ static void gk20a_debug_show_channel(struct gk20a *g,
104 u32 syncpointa, syncpointb; 104 u32 syncpointa, syncpointb;
105 void *inst_ptr; 105 void *inst_ptr;
106 106
107 inst_ptr = ch->inst_block.cpuva; 107 inst_ptr = ch->inst_block.cpu_va;
108 if (!inst_ptr) 108 if (!inst_ptr)
109 return; 109 return;
110 110
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index cf1242ab..dee58d0a 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -159,7 +159,6 @@ u32 gk20a_fifo_engine_interrupt_mask(struct gk20a *g)
159static void gk20a_remove_fifo_support(struct fifo_gk20a *f) 159static void gk20a_remove_fifo_support(struct fifo_gk20a *f)
160{ 160{
161 struct gk20a *g = f->g; 161 struct gk20a *g = f->g;
162 struct device *d = dev_from_gk20a(g);
163 struct fifo_engine_info_gk20a *engine_info; 162 struct fifo_engine_info_gk20a *engine_info;
164 struct fifo_runlist_info_gk20a *runlist; 163 struct fifo_runlist_info_gk20a *runlist;
165 u32 runlist_id; 164 u32 runlist_id;
@@ -175,36 +174,14 @@ static void gk20a_remove_fifo_support(struct fifo_gk20a *f)
175 } 174 }
176 kfree(f->channel); 175 kfree(f->channel);
177 } 176 }
178 if (f->userd.gpu_va) 177 gk20a_gmmu_unmap_free(&g->mm.bar1.vm, &f->userd);
179 gk20a_gmmu_unmap(&g->mm.bar1.vm,
180 f->userd.gpu_va,
181 f->userd.size,
182 gk20a_mem_flag_none);
183
184 if (f->userd.sgt)
185 gk20a_free_sgtable(&f->userd.sgt);
186
187 if (f->userd.cpuva)
188 dma_free_coherent(d,
189 f->userd_total_size,
190 f->userd.cpuva,
191 f->userd.iova);
192 f->userd.cpuva = NULL;
193 f->userd.iova = 0;
194 178
195 engine_info = f->engine_info + ENGINE_GR_GK20A; 179 engine_info = f->engine_info + ENGINE_GR_GK20A;
196 runlist_id = engine_info->runlist_id; 180 runlist_id = engine_info->runlist_id;
197 runlist = &f->runlist_info[runlist_id]; 181 runlist = &f->runlist_info[runlist_id];
198 182
199 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { 183 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++)
200 if (runlist->mem[i].cpuva) 184 gk20a_gmmu_free(g, &runlist->mem[i]);
201 dma_free_coherent(d,
202 runlist->mem[i].size,
203 runlist->mem[i].cpuva,
204 runlist->mem[i].iova);
205 runlist->mem[i].cpuva = NULL;
206 runlist->mem[i].iova = 0;
207 }
208 185
209 kfree(runlist->active_channels); 186 kfree(runlist->active_channels);
210 kfree(runlist->active_tsgs); 187 kfree(runlist->active_tsgs);
@@ -327,19 +304,11 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
327 304
328 runlist_size = ram_rl_entry_size_v() * f->num_channels; 305 runlist_size = ram_rl_entry_size_v() * f->num_channels;
329 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { 306 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
330 dma_addr_t iova; 307 int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]);
331 308 if (err) {
332 runlist->mem[i].cpuva =
333 dma_alloc_coherent(d,
334 runlist_size,
335 &iova,
336 GFP_KERNEL);
337 if (!runlist->mem[i].cpuva) {
338 dev_err(d, "memory allocation failed\n"); 309 dev_err(d, "memory allocation failed\n");
339 goto clean_up_runlist; 310 goto clean_up_runlist;
340 } 311 }
341 runlist->mem[i].iova = iova;
342 runlist->mem[i].size = runlist_size;
343 } 312 }
344 mutex_init(&runlist->mutex); 313 mutex_init(&runlist->mutex);
345 314
@@ -351,15 +320,8 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
351 return 0; 320 return 0;
352 321
353clean_up_runlist: 322clean_up_runlist:
354 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { 323 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++)
355 if (runlist->mem[i].cpuva) 324 gk20a_gmmu_free(g, &runlist->mem[i]);
356 dma_free_coherent(d,
357 runlist->mem[i].size,
358 runlist->mem[i].cpuva,
359 runlist->mem[i].iova);
360 runlist->mem[i].cpuva = NULL;
361 runlist->mem[i].iova = 0;
362 }
363 325
364 kfree(runlist->active_channels); 326 kfree(runlist->active_channels);
365 runlist->active_channels = NULL; 327 runlist->active_channels = NULL;
@@ -502,7 +464,6 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)
502 struct fifo_gk20a *f = &g->fifo; 464 struct fifo_gk20a *f = &g->fifo;
503 struct device *d = dev_from_gk20a(g); 465 struct device *d = dev_from_gk20a(g);
504 int chid, i, err = 0; 466 int chid, i, err = 0;
505 dma_addr_t iova;
506 467
507 gk20a_dbg_fn(""); 468 gk20a_dbg_fn("");
508 469
@@ -521,43 +482,17 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)
521 f->max_engines = ENGINE_INVAL_GK20A; 482 f->max_engines = ENGINE_INVAL_GK20A;
522 483
523 f->userd_entry_size = 1 << ram_userd_base_shift_v(); 484 f->userd_entry_size = 1 << ram_userd_base_shift_v();
524 f->userd_total_size = f->userd_entry_size * f->num_channels;
525 485
526 f->userd.cpuva = dma_alloc_coherent(d, 486 err = gk20a_gmmu_alloc_map(&g->mm.bar1.vm,
527 f->userd_total_size, 487 f->userd_entry_size * f->num_channels,
528 &iova, 488 &f->userd);
529 GFP_KERNEL);
530 if (!f->userd.cpuva) {
531 dev_err(d, "memory allocation failed\n");
532 err = -ENOMEM;
533 goto clean_up;
534 }
535
536 f->userd.iova = iova;
537 err = gk20a_get_sgtable(d, &f->userd.sgt,
538 f->userd.cpuva, f->userd.iova,
539 f->userd_total_size);
540 if (err) { 489 if (err) {
541 dev_err(d, "failed to create sg table\n"); 490 dev_err(d, "memory allocation failed\n");
542 goto clean_up;
543 }
544
545 /* bar1 va */
546 f->userd.gpu_va = gk20a_gmmu_map(&g->mm.bar1.vm,
547 &f->userd.sgt,
548 f->userd_total_size,
549 0, /* flags */
550 gk20a_mem_flag_none);
551 if (!f->userd.gpu_va) {
552 dev_err(d, "gmmu mapping failed\n");
553 err = -ENOMEM;
554 goto clean_up; 491 goto clean_up;
555 } 492 }
556 493
557 gk20a_dbg(gpu_dbg_map, "userd bar1 va = 0x%llx", f->userd.gpu_va); 494 gk20a_dbg(gpu_dbg_map, "userd bar1 va = 0x%llx", f->userd.gpu_va);
558 495
559 f->userd.size = f->userd_total_size;
560
561 f->channel = kzalloc(f->num_channels * sizeof(*f->channel), 496 f->channel = kzalloc(f->num_channels * sizeof(*f->channel),
562 GFP_KERNEL); 497 GFP_KERNEL);
563 f->tsg = kzalloc(f->num_channels * sizeof(*f->tsg), 498 f->tsg = kzalloc(f->num_channels * sizeof(*f->tsg),
@@ -582,9 +517,9 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)
582 517
583 for (chid = 0; chid < f->num_channels; chid++) { 518 for (chid = 0; chid < f->num_channels; chid++) {
584 f->channel[chid].userd_cpu_va = 519 f->channel[chid].userd_cpu_va =
585 f->userd.cpuva + chid * f->userd_entry_size; 520 f->userd.cpu_va + chid * f->userd_entry_size;
586 f->channel[chid].userd_iova = 521 f->channel[chid].userd_iova =
587 gk20a_mm_smmu_vaddr_translate(g, f->userd.iova) 522 gk20a_mm_iova_addr(g, f->userd.sgt->sgl)
588 + chid * f->userd_entry_size; 523 + chid * f->userd_entry_size;
589 f->channel[chid].userd_gpu_va = 524 f->channel[chid].userd_gpu_va =
590 f->userd.gpu_va + chid * f->userd_entry_size; 525 f->userd.gpu_va + chid * f->userd_entry_size;
@@ -607,22 +542,7 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)
607 542
608clean_up: 543clean_up:
609 gk20a_dbg_fn("fail"); 544 gk20a_dbg_fn("fail");
610 if (f->userd.gpu_va) 545 gk20a_gmmu_unmap_free(&g->mm.bar1.vm, &f->userd);
611 gk20a_gmmu_unmap(&g->mm.bar1.vm,
612 f->userd.gpu_va,
613 f->userd.size,
614 gk20a_mem_flag_none);
615 if (f->userd.sgt)
616 gk20a_free_sgtable(&f->userd.sgt);
617 if (f->userd.cpuva)
618 dma_free_coherent(d,
619 f->userd_total_size,
620 f->userd.cpuva,
621 f->userd.iova);
622 f->userd.cpuva = NULL;
623 f->userd.iova = 0;
624
625 memset(&f->userd, 0, sizeof(struct userd_desc));
626 546
627 kfree(f->channel); 547 kfree(f->channel);
628 f->channel = NULL; 548 f->channel = NULL;
@@ -657,7 +577,7 @@ static int gk20a_init_fifo_setup_hw(struct gk20a *g)
657 u32 v, v1 = 0x33, v2 = 0x55; 577 u32 v, v1 = 0x33, v2 = 0x55;
658 578
659 u32 bar1_vaddr = f->userd.gpu_va; 579 u32 bar1_vaddr = f->userd.gpu_va;
660 volatile u32 *cpu_vaddr = f->userd.cpuva; 580 volatile u32 *cpu_vaddr = f->userd.cpu_va;
661 581
662 gk20a_dbg_info("test bar1 @ vaddr 0x%x", 582 gk20a_dbg_info("test bar1 @ vaddr 0x%x",
663 bar1_vaddr); 583 bar1_vaddr);
@@ -725,8 +645,8 @@ channel_from_inst_ptr(struct fifo_gk20a *f, u64 inst_ptr)
725 return NULL; 645 return NULL;
726 for (ci = 0; ci < f->num_channels; ci++) { 646 for (ci = 0; ci < f->num_channels; ci++) {
727 struct channel_gk20a *c = f->channel+ci; 647 struct channel_gk20a *c = f->channel+ci;
728 if (c->inst_block.cpuva && 648 if (c->inst_block.cpu_va &&
729 (inst_ptr == c->inst_block.cpu_pa)) 649 (inst_ptr == sg_phys(c->inst_block.sgt->sgl)))
730 return f->channel+ci; 650 return f->channel+ci;
731 } 651 }
732 return NULL; 652 return NULL;
@@ -1082,10 +1002,10 @@ static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g)
1082 gk20a_fifo_set_ctx_mmu_error_ch(g, ch); 1002 gk20a_fifo_set_ctx_mmu_error_ch(g, ch);
1083 gk20a_channel_abort(ch); 1003 gk20a_channel_abort(ch);
1084 } else if (f.inst_ptr == 1004 } else if (f.inst_ptr ==
1085 g->mm.bar1.inst_block.cpu_pa) { 1005 sg_phys(g->mm.bar1.inst_block.sgt->sgl)) {
1086 gk20a_err(dev_from_gk20a(g), "mmu fault from bar1"); 1006 gk20a_err(dev_from_gk20a(g), "mmu fault from bar1");
1087 } else if (f.inst_ptr == 1007 } else if (f.inst_ptr ==
1088 g->mm.pmu.inst_block.cpu_pa) { 1008 sg_phys(g->mm.pmu.inst_block.sgt->sgl)) {
1089 gk20a_err(dev_from_gk20a(g), "mmu fault from pmu"); 1009 gk20a_err(dev_from_gk20a(g), "mmu fault from pmu");
1090 } else 1010 } else
1091 gk20a_err(dev_from_gk20a(g), "couldn't locate channel for mmu fault"); 1011 gk20a_err(dev_from_gk20a(g), "couldn't locate channel for mmu fault");
@@ -1893,7 +1813,6 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
1893 bool wait_for_finish) 1813 bool wait_for_finish)
1894{ 1814{
1895 u32 ret = 0; 1815 u32 ret = 0;
1896 struct device *d = dev_from_gk20a(g);
1897 struct fifo_gk20a *f = &g->fifo; 1816 struct fifo_gk20a *f = &g->fifo;
1898 struct fifo_runlist_info_gk20a *runlist = NULL; 1817 struct fifo_runlist_info_gk20a *runlist = NULL;
1899 u32 *runlist_entry_base = NULL; 1818 u32 *runlist_entry_base = NULL;
@@ -1935,15 +1854,15 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
1935 new_buf = !runlist->cur_buffer; 1854 new_buf = !runlist->cur_buffer;
1936 1855
1937 gk20a_dbg_info("runlist_id : %d, switch to new buffer 0x%16llx", 1856 gk20a_dbg_info("runlist_id : %d, switch to new buffer 0x%16llx",
1938 runlist_id, runlist->mem[new_buf].iova); 1857 runlist_id, (u64)gk20a_mem_phys(&runlist->mem[new_buf]));
1939 1858
1940 runlist_pa = gk20a_get_phys_from_iova(d, runlist->mem[new_buf].iova); 1859 runlist_pa = gk20a_mem_phys(&runlist->mem[new_buf]);
1941 if (!runlist_pa) { 1860 if (!runlist_pa) {
1942 ret = -EINVAL; 1861 ret = -EINVAL;
1943 goto clean_up; 1862 goto clean_up;
1944 } 1863 }
1945 1864
1946 runlist_entry_base = runlist->mem[new_buf].cpuva; 1865 runlist_entry_base = runlist->mem[new_buf].cpu_va;
1947 if (!runlist_entry_base) { 1866 if (!runlist_entry_base) {
1948 ret = -ENOMEM; 1867 ret = -ENOMEM;
1949 goto clean_up; 1868 goto clean_up;
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
index 4ff1398a..dd320ae1 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -32,7 +32,7 @@ struct fifo_runlist_info_gk20a {
32 unsigned long *active_channels; 32 unsigned long *active_channels;
33 unsigned long *active_tsgs; 33 unsigned long *active_tsgs;
34 /* Each engine has its own SW and HW runlist buffer.*/ 34 /* Each engine has its own SW and HW runlist buffer.*/
35 struct runlist_mem_desc mem[MAX_RUNLIST_BUFFERS]; 35 struct mem_desc mem[MAX_RUNLIST_BUFFERS];
36 u32 cur_buffer; 36 u32 cur_buffer;
37 u32 total_entries; 37 u32 total_entries;
38 bool stopped; 38 bool stopped;
@@ -102,9 +102,8 @@ struct fifo_gk20a {
102 struct fifo_runlist_info_gk20a *runlist_info; 102 struct fifo_runlist_info_gk20a *runlist_info;
103 u32 max_runlists; 103 u32 max_runlists;
104 104
105 struct userd_desc userd; 105 struct mem_desc userd;
106 u32 userd_entry_size; 106 u32 userd_entry_size;
107 u32 userd_total_size;
108 107
109 struct channel_gk20a *channel; 108 struct channel_gk20a *channel;
110 struct mutex ch_inuse_mutex; /* protect unused chid look up */ 109 struct mutex ch_inuse_mutex; /* protect unused chid look up */
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index e9b209c4..a160942f 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -567,7 +567,7 @@ static int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va)
567 567
568 gk20a_dbg_fn(""); 568 gk20a_dbg_fn("");
569 569
570 inst_ptr = c->inst_block.cpuva; 570 inst_ptr = c->inst_block.cpu_va;
571 if (!inst_ptr) 571 if (!inst_ptr)
572 return -ENOMEM; 572 return -ENOMEM;
573 573
@@ -674,7 +674,7 @@ int gr_gk20a_ctx_patch_write(struct gk20a *g,
674static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g, 674static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g,
675 struct channel_gk20a *c) 675 struct channel_gk20a *c)
676{ 676{
677 u32 inst_base_ptr = u64_lo32(c->inst_block.cpu_pa 677 u32 inst_base_ptr = u64_lo32(gk20a_mem_phys(&c->inst_block)
678 >> ram_in_base_shift_v()); 678 >> ram_in_base_shift_v());
679 u32 ret; 679 u32 ret;
680 680
@@ -1375,7 +1375,7 @@ static int gr_gk20a_fecs_ctx_image_save(struct channel_gk20a *c, u32 save_type)
1375 int ret; 1375 int ret;
1376 1376
1377 u32 inst_base_ptr = 1377 u32 inst_base_ptr =
1378 u64_lo32(c->inst_block.cpu_pa 1378 u64_lo32(gk20a_mem_phys(&c->inst_block)
1379 >> ram_in_base_shift_v()); 1379 >> ram_in_base_shift_v());
1380 1380
1381 1381
@@ -1671,7 +1671,7 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1671 1671
1672 if (tegra_platform_is_linsim()) { 1672 if (tegra_platform_is_linsim()) {
1673 u32 inst_base_ptr = 1673 u32 inst_base_ptr =
1674 u64_lo32(c->inst_block.cpu_pa 1674 u64_lo32(gk20a_mem_phys(&c->inst_block)
1675 >> ram_in_base_shift_v()); 1675 >> ram_in_base_shift_v());
1676 1676
1677 ret = gr_gk20a_submit_fecs_method_op(g, 1677 ret = gr_gk20a_submit_fecs_method_op(g,
@@ -1729,12 +1729,12 @@ static int gr_gk20a_init_ctxsw_ucode_vaspace(struct gk20a *g)
1729 gk20a_init_inst_block(&ucode_info->inst_blk_desc, vm, 0); 1729 gk20a_init_inst_block(&ucode_info->inst_blk_desc, vm, 0);
1730 1730
1731 /* Map ucode surface to GMMU */ 1731 /* Map ucode surface to GMMU */
1732 ucode_info->ucode_gpuva = gk20a_gmmu_map(vm, 1732 ucode_info->surface_desc.gpu_va = gk20a_gmmu_map(vm,
1733 &ucode_info->surface_desc.sgt, 1733 &ucode_info->surface_desc.sgt,
1734 ucode_info->surface_desc.size, 1734 ucode_info->surface_desc.size,
1735 0, /* flags */ 1735 0, /* flags */
1736 gk20a_mem_flag_read_only); 1736 gk20a_mem_flag_read_only);
1737 if (!ucode_info->ucode_gpuva) { 1737 if (!ucode_info->surface_desc.gpu_va) {
1738 gk20a_err(d, "failed to update gmmu ptes\n"); 1738 gk20a_err(d, "failed to update gmmu ptes\n");
1739 return -ENOMEM; 1739 return -ENOMEM;
1740 } 1740 }
@@ -1798,8 +1798,6 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g)
1798 u8 *buf; 1798 u8 *buf;
1799 u32 ucode_size; 1799 u32 ucode_size;
1800 int err = 0; 1800 int err = 0;
1801 dma_addr_t iova;
1802 DEFINE_DMA_ATTRS(attrs);
1803 1801
1804 fecs_fw = gk20a_request_firmware(g, GK20A_FECS_UCODE_IMAGE); 1802 fecs_fw = gk20a_request_firmware(g, GK20A_FECS_UCODE_IMAGE);
1805 if (!fecs_fw) { 1803 if (!fecs_fw) {
@@ -1832,30 +1830,12 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g)
1832 g->gr.ctx_vars.ucode.gpccs.inst.count * sizeof(u32), 1830 g->gr.ctx_vars.ucode.gpccs.inst.count * sizeof(u32),
1833 g->gr.ctx_vars.ucode.gpccs.data.count * sizeof(u32)); 1831 g->gr.ctx_vars.ucode.gpccs.data.count * sizeof(u32));
1834 1832
1835 ucode_info->surface_desc.size = ucode_size; 1833 err = gk20a_gmmu_alloc_attr(g, DMA_ATTR_READ_ONLY, ucode_size,
1836 dma_set_attr(DMA_ATTR_READ_ONLY, &attrs); 1834 &ucode_info->surface_desc);
1837 ucode_info->surface_desc.cpuva = dma_alloc_attrs(d, 1835 if (err)
1838 ucode_info->surface_desc.size,
1839 &iova,
1840 GFP_KERNEL,
1841 &attrs);
1842 if (!ucode_info->surface_desc.cpuva) {
1843 gk20a_err(d, "memory allocation failed\n");
1844 err = -ENOMEM;
1845 goto clean_up;
1846 }
1847
1848 ucode_info->surface_desc.iova = iova;
1849 err = gk20a_get_sgtable(d, &ucode_info->surface_desc.sgt,
1850 ucode_info->surface_desc.cpuva,
1851 ucode_info->surface_desc.iova,
1852 ucode_info->surface_desc.size);
1853 if (err) {
1854 gk20a_err(d, "failed to create sg table\n");
1855 goto clean_up; 1836 goto clean_up;
1856 }
1857 1837
1858 buf = (u8 *)ucode_info->surface_desc.cpuva; 1838 buf = (u8 *)ucode_info->surface_desc.cpu_va;
1859 if (!buf) { 1839 if (!buf) {
1860 gk20a_err(d, "failed to map surface desc buffer"); 1840 gk20a_err(d, "failed to map surface desc buffer");
1861 err = -ENOMEM; 1841 err = -ENOMEM;
@@ -1882,23 +1862,13 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g)
1882 if (err) 1862 if (err)
1883 goto clean_up; 1863 goto clean_up;
1884 1864
1885 gk20a_free_sgtable(&ucode_info->surface_desc.sgt);
1886
1887 return 0; 1865 return 0;
1888 1866
1889 clean_up: 1867 clean_up:
1890 if (ucode_info->ucode_gpuva) 1868 if (ucode_info->surface_desc.gpu_va)
1891 gk20a_gmmu_unmap(vm, ucode_info->ucode_gpuva, 1869 gk20a_gmmu_unmap(vm, ucode_info->surface_desc.gpu_va,
1892 ucode_info->surface_desc.size, gk20a_mem_flag_none); 1870 ucode_info->surface_desc.size, gk20a_mem_flag_none);
1893 if (ucode_info->surface_desc.sgt) 1871 gk20a_gmmu_free(g, &ucode_info->surface_desc);
1894 gk20a_free_sgtable(&ucode_info->surface_desc.sgt);
1895 if (ucode_info->surface_desc.cpuva)
1896 dma_free_attrs(d, ucode_info->surface_desc.size,
1897 ucode_info->surface_desc.cpuva,
1898 ucode_info->surface_desc.iova,
1899 &attrs);
1900 ucode_info->surface_desc.cpuva = NULL;
1901 ucode_info->surface_desc.iova = 0;
1902 1872
1903 release_firmware(gpccs_fw); 1873 release_firmware(gpccs_fw);
1904 gpccs_fw = NULL; 1874 gpccs_fw = NULL;
@@ -1928,7 +1898,7 @@ void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g)
1928 1898
1929 gk20a_writel(g, gr_fecs_arb_ctx_adr_r(), 0x0); 1899 gk20a_writel(g, gr_fecs_arb_ctx_adr_r(), 0x0);
1930 1900
1931 inst_ptr = ucode_info->inst_blk_desc.cpu_pa; 1901 inst_ptr = gk20a_mem_phys(&ucode_info->inst_blk_desc);
1932 gk20a_writel(g, gr_fecs_new_ctx_r(), 1902 gk20a_writel(g, gr_fecs_new_ctx_r(),
1933 gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) | 1903 gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) |
1934 gr_fecs_new_ctx_target_m() | 1904 gr_fecs_new_ctx_target_m() |
@@ -2111,7 +2081,7 @@ static int gr_gk20a_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base,
2111static void gr_gk20a_load_falcon_with_bootloader(struct gk20a *g) 2081static void gr_gk20a_load_falcon_with_bootloader(struct gk20a *g)
2112{ 2082{
2113 struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; 2083 struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
2114 u64 addr_base = ucode_info->ucode_gpuva; 2084 u64 addr_base = ucode_info->surface_desc.gpu_va;
2115 2085
2116 gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), 0x0); 2086 gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), 0x0);
2117 2087
@@ -2128,6 +2098,7 @@ static void gr_gk20a_load_falcon_with_bootloader(struct gk20a *g)
2128 2098
2129int gr_gk20a_load_ctxsw_ucode(struct gk20a *g) 2099int gr_gk20a_load_ctxsw_ucode(struct gk20a *g)
2130{ 2100{
2101 int err;
2131 2102
2132 gk20a_dbg_fn(""); 2103 gk20a_dbg_fn("");
2133 2104
@@ -2147,8 +2118,12 @@ int gr_gk20a_load_ctxsw_ucode(struct gk20a *g)
2147 gr_gk20a_load_falcon_imem(g); 2118 gr_gk20a_load_falcon_imem(g);
2148 gr_gk20a_start_falcon_ucode(g); 2119 gr_gk20a_start_falcon_ucode(g);
2149 } else { 2120 } else {
2150 if (!g->gr.skip_ucode_init) 2121 if (!g->gr.skip_ucode_init) {
2151 gr_gk20a_init_ctxsw_ucode(g); 2122 err = gr_gk20a_init_ctxsw_ucode(g);
2123
2124 if (err)
2125 return err;
2126 }
2152 gr_gk20a_load_falcon_with_bootloader(g); 2127 gr_gk20a_load_falcon_with_bootloader(g);
2153 g->gr.skip_ucode_init = true; 2128 g->gr.skip_ucode_init = true;
2154 } 2129 }
@@ -2976,21 +2951,13 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr)
2976 2951
2977 gr_gk20a_free_global_ctx_buffers(g); 2952 gr_gk20a_free_global_ctx_buffers(g);
2978 2953
2979 dma_free_coherent(d, gr->mmu_wr_mem.size, 2954 gk20a_gmmu_free(g, &gr->mmu_wr_mem);
2980 gr->mmu_wr_mem.cpuva, gr->mmu_wr_mem.iova); 2955 gk20a_gmmu_free(g, &gr->mmu_rd_mem);
2981 gr->mmu_wr_mem.cpuva = NULL;
2982 gr->mmu_wr_mem.iova = 0;
2983 dma_free_coherent(d, gr->mmu_rd_mem.size,
2984 gr->mmu_rd_mem.cpuva, gr->mmu_rd_mem.iova);
2985 gr->mmu_rd_mem.cpuva = NULL;
2986 gr->mmu_rd_mem.iova = 0;
2987 2956
2988 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); 2957 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
2989 dma_free_attrs(d, gr->compbit_store.size, gr->compbit_store.pages, 2958 dma_free_attrs(d, gr->compbit_store.size, gr->compbit_store.pages,
2990 gr->compbit_store.base_iova, &attrs); 2959 gr->compbit_store.base_iova, &attrs);
2991 2960
2992 memset(&gr->mmu_wr_mem, 0, sizeof(struct mmu_desc));
2993 memset(&gr->mmu_rd_mem, 0, sizeof(struct mmu_desc));
2994 memset(&gr->compbit_store, 0, sizeof(struct compbit_store_desc)); 2961 memset(&gr->compbit_store, 0, sizeof(struct compbit_store_desc));
2995 2962
2996 kfree(gr->gpc_tpc_count); 2963 kfree(gr->gpc_tpc_count);
@@ -3234,33 +3201,19 @@ clean_up:
3234 3201
3235static int gr_gk20a_init_mmu_sw(struct gk20a *g, struct gr_gk20a *gr) 3202static int gr_gk20a_init_mmu_sw(struct gk20a *g, struct gr_gk20a *gr)
3236{ 3203{
3237 struct device *d = dev_from_gk20a(g); 3204 int err;
3238 dma_addr_t iova;
3239
3240 gr->mmu_wr_mem_size = gr->mmu_rd_mem_size = 0x1000;
3241 3205
3242 gr->mmu_wr_mem.size = gr->mmu_wr_mem_size; 3206 err = gk20a_gmmu_alloc(g, 0x1000, &gr->mmu_wr_mem);
3243 gr->mmu_wr_mem.cpuva = dma_zalloc_coherent(d, gr->mmu_wr_mem_size, 3207 if (err)
3244 &iova, GFP_KERNEL);
3245 if (!gr->mmu_wr_mem.cpuva)
3246 goto err; 3208 goto err;
3247 3209
3248 gr->mmu_wr_mem.iova = iova; 3210 err = gk20a_gmmu_alloc(g, 0x1000, &gr->mmu_rd_mem);
3249 3211 if (err)
3250 gr->mmu_rd_mem.size = gr->mmu_rd_mem_size;
3251 gr->mmu_rd_mem.cpuva = dma_zalloc_coherent(d, gr->mmu_rd_mem_size,
3252 &iova, GFP_KERNEL);
3253 if (!gr->mmu_rd_mem.cpuva)
3254 goto err_free_wr_mem; 3212 goto err_free_wr_mem;
3255
3256 gr->mmu_rd_mem.iova = iova;
3257 return 0; 3213 return 0;
3258 3214
3259 err_free_wr_mem: 3215 err_free_wr_mem:
3260 dma_free_coherent(d, gr->mmu_wr_mem.size, 3216 gk20a_gmmu_free(g, &gr->mmu_wr_mem);
3261 gr->mmu_wr_mem.cpuva, gr->mmu_wr_mem.iova);
3262 gr->mmu_wr_mem.cpuva = NULL;
3263 gr->mmu_wr_mem.iova = 0;
3264 err: 3217 err:
3265 return -ENOMEM; 3218 return -ENOMEM;
3266} 3219}
@@ -4241,7 +4194,7 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
4241 gk20a_dbg_fn(""); 4194 gk20a_dbg_fn("");
4242 4195
4243 /* init mmu debug buffer */ 4196 /* init mmu debug buffer */
4244 addr = gk20a_mm_smmu_vaddr_translate(g, gr->mmu_wr_mem.iova); 4197 addr = gk20a_mm_iova_addr(g, gr->mmu_wr_mem.sgt->sgl);
4245 addr >>= fb_mmu_debug_wr_addr_alignment_v(); 4198 addr >>= fb_mmu_debug_wr_addr_alignment_v();
4246 4199
4247 gk20a_writel(g, fb_mmu_debug_wr_r(), 4200 gk20a_writel(g, fb_mmu_debug_wr_r(),
@@ -4249,7 +4202,7 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
4249 fb_mmu_debug_wr_vol_false_f() | 4202 fb_mmu_debug_wr_vol_false_f() |
4250 fb_mmu_debug_wr_addr_f(addr)); 4203 fb_mmu_debug_wr_addr_f(addr));
4251 4204
4252 addr = gk20a_mm_smmu_vaddr_translate(g, gr->mmu_rd_mem.iova); 4205 addr = gk20a_mm_iova_addr(g, gr->mmu_rd_mem.sgt->sgl);
4253 addr >>= fb_mmu_debug_rd_addr_alignment_v(); 4206 addr >>= fb_mmu_debug_rd_addr_alignment_v();
4254 4207
4255 gk20a_writel(g, fb_mmu_debug_rd_r(), 4208 gk20a_writel(g, fb_mmu_debug_rd_r(),
@@ -4651,8 +4604,6 @@ static int gk20a_init_gr_bind_fecs_elpg(struct gk20a *g)
4651 int err = 0; 4604 int err = 0;
4652 4605
4653 u32 size; 4606 u32 size;
4654 struct sg_table *sgt_pg_buf;
4655 dma_addr_t iova;
4656 4607
4657 gk20a_dbg_fn(""); 4608 gk20a_dbg_fn("");
4658 4609
@@ -4665,50 +4616,24 @@ static int gk20a_init_gr_bind_fecs_elpg(struct gk20a *g)
4665 return err; 4616 return err;
4666 } 4617 }
4667 4618
4668 if (!pmu->pg_buf.cpuva) { 4619 if (!pmu->pg_buf.cpu_va) {
4669 pmu->pg_buf.cpuva = dma_alloc_coherent(d, size, 4620 err = gk20a_gmmu_alloc_map(vm, size, &pmu->pg_buf);
4670 &iova, 4621 if (err) {
4671 GFP_KERNEL);
4672 if (!pmu->pg_buf.cpuva) {
4673 gk20a_err(d, "failed to allocate memory\n"); 4622 gk20a_err(d, "failed to allocate memory\n");
4674 return -ENOMEM; 4623 return -ENOMEM;
4675 } 4624 }
4676
4677 pmu->pg_buf.iova = iova;
4678 pmu->pg_buf.size = size;
4679
4680 err = gk20a_get_sgtable(d, &sgt_pg_buf,
4681 pmu->pg_buf.cpuva,
4682 pmu->pg_buf.iova,
4683 size);
4684 if (err) {
4685 gk20a_err(d, "failed to create sg table\n");
4686 goto err_free_pg_buf;
4687 }
4688
4689 pmu->pg_buf.pmu_va = gk20a_gmmu_map(vm,
4690 &sgt_pg_buf,
4691 size,
4692 0, /* flags */
4693 gk20a_mem_flag_none);
4694 if (!pmu->pg_buf.pmu_va) {
4695 gk20a_err(d, "failed to map fecs pg buffer");
4696 err = -ENOMEM;
4697 goto err_free_sgtable;
4698 }
4699
4700 gk20a_free_sgtable(&sgt_pg_buf);
4701 } 4625 }
4702 4626
4703 4627
4704 err = gr_gk20a_fecs_set_reglist_bind_inst(g, mm->pmu.inst_block.cpu_pa); 4628 err = gr_gk20a_fecs_set_reglist_bind_inst(g,
4629 gk20a_mem_phys(&mm->pmu.inst_block));
4705 if (err) { 4630 if (err) {
4706 gk20a_err(dev_from_gk20a(g), 4631 gk20a_err(dev_from_gk20a(g),
4707 "fail to bind pmu inst to gr"); 4632 "fail to bind pmu inst to gr");
4708 return err; 4633 return err;
4709 } 4634 }
4710 4635
4711 err = gr_gk20a_fecs_set_reglist_virtual_addr(g, pmu->pg_buf.pmu_va); 4636 err = gr_gk20a_fecs_set_reglist_virtual_addr(g, pmu->pg_buf.gpu_va);
4712 if (err) { 4637 if (err) {
4713 gk20a_err(dev_from_gk20a(g), 4638 gk20a_err(dev_from_gk20a(g),
4714 "fail to set pg buffer pmu va"); 4639 "fail to set pg buffer pmu va");
@@ -4716,15 +4641,6 @@ static int gk20a_init_gr_bind_fecs_elpg(struct gk20a *g)
4716 } 4641 }
4717 4642
4718 return err; 4643 return err;
4719
4720err_free_sgtable:
4721 gk20a_free_sgtable(&sgt_pg_buf);
4722err_free_pg_buf:
4723 dma_free_coherent(d, size,
4724 pmu->pg_buf.cpuva, pmu->pg_buf.iova);
4725 pmu->pg_buf.cpuva = NULL;
4726 pmu->pg_buf.iova = 0;
4727 return err;
4728} 4644}
4729 4645
4730int gk20a_init_gr_support(struct gk20a *g) 4646int gk20a_init_gr_support(struct gk20a *g)
@@ -4983,14 +4899,14 @@ int gk20a_gr_reset(struct gk20a *g)
4983 } 4899 }
4984 4900
4985 err = gr_gk20a_fecs_set_reglist_bind_inst(g, 4901 err = gr_gk20a_fecs_set_reglist_bind_inst(g,
4986 g->mm.pmu.inst_block.cpu_pa); 4902 gk20a_mem_phys(&g->mm.pmu.inst_block));
4987 if (err) { 4903 if (err) {
4988 gk20a_err(dev_from_gk20a(g), 4904 gk20a_err(dev_from_gk20a(g),
4989 "fail to bind pmu inst to gr"); 4905 "fail to bind pmu inst to gr");
4990 return err; 4906 return err;
4991 } 4907 }
4992 4908
4993 err = gr_gk20a_fecs_set_reglist_virtual_addr(g, g->pmu.pg_buf.pmu_va); 4909 err = gr_gk20a_fecs_set_reglist_virtual_addr(g, g->pmu.pg_buf.gpu_va);
4994 if (err) { 4910 if (err) {
4995 gk20a_err(dev_from_gk20a(g), 4911 gk20a_err(dev_from_gk20a(g),
4996 "fail to set pg buffer pmu va"); 4912 "fail to set pg buffer pmu va");
@@ -5357,7 +5273,7 @@ static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx,
5357 /* slow path */ 5273 /* slow path */
5358 for (chid = 0; chid < f->num_channels; chid++) 5274 for (chid = 0; chid < f->num_channels; chid++)
5359 if (f->channel[chid].in_use) { 5275 if (f->channel[chid].in_use) {
5360 if ((u32)(f->channel[chid].inst_block.cpu_pa >> 5276 if ((u32)(gk20a_mem_phys(&f->channel[chid].inst_block) >>
5361 ram_in_base_shift_v()) == 5277 ram_in_base_shift_v()) ==
5362 gr_fecs_current_ctx_ptr_v(curr_ctx)) { 5278 gr_fecs_current_ctx_ptr_v(curr_ctx)) {
5363 tsgid = f->channel[chid].tsgid; 5279 tsgid = f->channel[chid].tsgid;
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 5dfaac5f..81615e0f 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -256,10 +256,8 @@ struct gr_gk20a {
256 256
257 struct gr_ctx_buffer_desc global_ctx_buffer[NR_GLOBAL_CTX_BUF]; 257 struct gr_ctx_buffer_desc global_ctx_buffer[NR_GLOBAL_CTX_BUF];
258 258
259 struct mmu_desc mmu_wr_mem; 259 struct mem_desc mmu_wr_mem;
260 u32 mmu_wr_mem_size; 260 struct mem_desc mmu_rd_mem;
261 struct mmu_desc mmu_rd_mem;
262 u32 mmu_rd_mem_size;
263 261
264 u8 *map_tiles; 262 u8 *map_tiles;
265 u32 map_tile_count; 263 u32 map_tile_count;
@@ -336,9 +334,8 @@ struct gk20a_ctxsw_ucode_segments {
336 334
337struct gk20a_ctxsw_ucode_info { 335struct gk20a_ctxsw_ucode_info {
338 u64 *p_va; 336 u64 *p_va;
339 struct inst_desc inst_blk_desc; 337 struct mem_desc inst_blk_desc;
340 struct surface_mem_desc surface_desc; 338 struct mem_desc surface_desc;
341 u64 ucode_gpuva;
342 struct gk20a_ctxsw_ucode_segments fecs; 339 struct gk20a_ctxsw_ucode_segments fecs;
343 struct gk20a_ctxsw_ucode_segments gpccs; 340 struct gk20a_ctxsw_ucode_segments gpccs;
344}; 341};
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index c3895a53..954249c6 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -268,7 +268,7 @@ static int gk20a_init_mm_reset_enable_hw(struct gk20a *g)
268 return 0; 268 return 0;
269} 269}
270 270
271static void gk20a_remove_vm(struct vm_gk20a *vm, struct inst_desc *inst_block) 271static void gk20a_remove_vm(struct vm_gk20a *vm, struct mem_desc *inst_block)
272{ 272{
273 struct gk20a *g = vm->mm->g; 273 struct gk20a *g = vm->mm->g;
274 274
@@ -335,8 +335,8 @@ int gk20a_init_mm_setup_sw(struct gk20a *g)
335int gk20a_init_mm_setup_hw(struct gk20a *g) 335int gk20a_init_mm_setup_hw(struct gk20a *g)
336{ 336{
337 struct mm_gk20a *mm = &g->mm; 337 struct mm_gk20a *mm = &g->mm;
338 struct inst_desc *inst_block = &mm->bar1.inst_block; 338 struct mem_desc *inst_block = &mm->bar1.inst_block;
339 phys_addr_t inst_pa = inst_block->cpu_pa; 339 phys_addr_t inst_pa = gk20a_mem_phys(inst_block);
340 int err; 340 int err;
341 341
342 gk20a_dbg_fn(""); 342 gk20a_dbg_fn("");
@@ -1516,54 +1516,95 @@ u64 gk20a_gmmu_map(struct vm_gk20a *vm,
1516 return vaddr; 1516 return vaddr;
1517} 1517}
1518 1518
1519int gk20a_gmmu_alloc_map(struct vm_gk20a *vm, 1519int gk20a_gmmu_alloc(struct gk20a *g, size_t size, struct mem_desc *mem)
1520 size_t size, struct mem_desc *mem) 1520{
1521 return gk20a_gmmu_alloc_attr(g, 0, size, mem);
1522}
1523
1524int gk20a_gmmu_alloc_attr(struct gk20a *g, enum dma_attr attr, size_t size, struct mem_desc *mem)
1521{ 1525{
1522 struct gk20a *g = vm->mm->g;
1523 struct device *d = dev_from_gk20a(g); 1526 struct device *d = dev_from_gk20a(g);
1524 int err; 1527 int err;
1525 struct sg_table *sgt; 1528 dma_addr_t iova;
1529
1530 gk20a_dbg_fn("");
1531
1532 if (attr) {
1533 DEFINE_DMA_ATTRS(attrs);
1534 dma_set_attr(attr, &attrs);
1535 mem->cpu_va =
1536 dma_alloc_attrs(d, size, &iova, GFP_KERNEL, &attrs);
1537 } else {
1538 mem->cpu_va = dma_alloc_coherent(d, size, &iova, GFP_KERNEL);
1539 }
1526 1540
1527 mem->cpu_va = dma_alloc_coherent(d, size, &mem->iova, GFP_KERNEL);
1528 if (!mem->cpu_va) 1541 if (!mem->cpu_va)
1529 return -ENOMEM; 1542 return -ENOMEM;
1530 1543
1531 err = gk20a_get_sgtable(d, &sgt, mem->cpu_va, mem->iova, size); 1544 err = gk20a_get_sgtable(d, &mem->sgt, mem->cpu_va, iova, size);
1532 if (err) 1545 if (err)
1533 goto fail_free; 1546 goto fail_free;
1534 1547
1535 mem->gpu_va = gk20a_gmmu_map(vm, &sgt, size, 0, gk20a_mem_flag_none); 1548 mem->size = size;
1536 gk20a_free_sgtable(&sgt); 1549 memset(mem->cpu_va, 0, size);
1550
1551 gk20a_dbg_fn("done");
1552
1553 return 0;
1554
1555fail_free:
1556 dma_free_coherent(d, size, mem->cpu_va, iova);
1557 mem->cpu_va = NULL;
1558 mem->sgt = NULL;
1559 return err;
1560}
1561
1562void gk20a_gmmu_free(struct gk20a *g, struct mem_desc *mem)
1563{
1564 struct device *d = dev_from_gk20a(g);
1565
1566 if (mem->cpu_va)
1567 dma_free_coherent(d, mem->size, mem->cpu_va,
1568 sg_dma_address(mem->sgt->sgl));
1569 mem->cpu_va = NULL;
1570
1571 if (mem->sgt)
1572 gk20a_free_sgtable(&mem->sgt);
1573}
1574
1575int gk20a_gmmu_alloc_map(struct vm_gk20a *vm, size_t size, struct mem_desc *mem)
1576{
1577 return gk20a_gmmu_alloc_map_attr(vm, 0, size, mem);
1578}
1579
1580int gk20a_gmmu_alloc_map_attr(struct vm_gk20a *vm,
1581 enum dma_attr attr, size_t size, struct mem_desc *mem)
1582{
1583 int err = gk20a_gmmu_alloc_attr(vm->mm->g, attr, size, mem);
1584
1585 if (err)
1586 return err;
1587
1588 mem->gpu_va = gk20a_gmmu_map(vm, &mem->sgt, size, 0, gk20a_mem_flag_none);
1537 if (!mem->gpu_va) { 1589 if (!mem->gpu_va) {
1538 err = -ENOMEM; 1590 err = -ENOMEM;
1539 goto fail_free; 1591 goto fail_free;
1540 } 1592 }
1541 1593
1542 mem->size = size;
1543
1544 return 0; 1594 return 0;
1545 1595
1546fail_free: 1596fail_free:
1547 dma_free_coherent(d, size, mem->cpu_va, mem->iova); 1597 gk20a_gmmu_free(vm->mm->g, mem);
1548 mem->cpu_va = NULL;
1549 mem->iova = 0;
1550
1551 return err; 1598 return err;
1552} 1599}
1553 1600
1554void gk20a_gmmu_unmap_free(struct vm_gk20a *vm, struct mem_desc *mem) 1601void gk20a_gmmu_unmap_free(struct vm_gk20a *vm, struct mem_desc *mem)
1555{ 1602{
1556 struct gk20a *g = vm->mm->g;
1557 struct device *d = dev_from_gk20a(g);
1558
1559 if (mem->gpu_va) 1603 if (mem->gpu_va)
1560 gk20a_gmmu_unmap(vm, mem->gpu_va, mem->size, gk20a_mem_flag_none); 1604 gk20a_gmmu_unmap(vm, mem->gpu_va, mem->size, gk20a_mem_flag_none);
1561 mem->gpu_va = 0; 1605 mem->gpu_va = 0;
1562 1606
1563 if (mem->cpu_va) 1607 gk20a_gmmu_free(vm->mm->g, mem);
1564 dma_free_coherent(d, mem->size, mem->cpu_va, mem->iova);
1565 mem->cpu_va = NULL;
1566 mem->iova = 0;
1567} 1608}
1568 1609
1569dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr) 1610dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr)
@@ -2644,42 +2685,24 @@ void gk20a_deinit_vm(struct vm_gk20a *vm)
2644 kfree(vm->pdb.entries); 2685 kfree(vm->pdb.entries);
2645} 2686}
2646 2687
2647int gk20a_alloc_inst_block(struct gk20a *g, struct inst_desc *inst_block) 2688int gk20a_alloc_inst_block(struct gk20a *g, struct mem_desc *inst_block)
2648{ 2689{
2649 struct device *dev = dev_from_gk20a(g); 2690 struct device *dev = dev_from_gk20a(g);
2650 dma_addr_t iova; 2691 int err;
2651 2692
2652 inst_block->size = ram_in_alloc_size_v(); 2693 err = gk20a_gmmu_alloc(g, ram_in_alloc_size_v(), inst_block);
2653 inst_block->cpuva = dma_alloc_coherent(dev, inst_block->size, 2694 if (err) {
2654 &iova, GFP_KERNEL);
2655 if (!inst_block->cpuva) {
2656 gk20a_err(dev, "%s: memory allocation failed\n", __func__); 2695 gk20a_err(dev, "%s: memory allocation failed\n", __func__);
2657 return -ENOMEM; 2696 return err;
2658 }
2659
2660 inst_block->iova = iova;
2661 inst_block->cpu_pa = gk20a_get_phys_from_iova(dev, inst_block->iova);
2662 if (!inst_block->cpu_pa) {
2663 gk20a_err(dev, "%s: failed to get phys address\n", __func__);
2664 gk20a_free_inst_block(g, inst_block);
2665 return -ENOMEM;
2666 } 2697 }
2667 2698
2668 memset(inst_block->cpuva, 0, inst_block->size);
2669
2670 return 0; 2699 return 0;
2671} 2700}
2672 2701
2673void gk20a_free_inst_block(struct gk20a *g, struct inst_desc *inst_block) 2702void gk20a_free_inst_block(struct gk20a *g, struct mem_desc *inst_block)
2674{ 2703{
2675 struct device *dev = dev_from_gk20a(g); 2704 if (inst_block->cpu_va)
2676 2705 gk20a_gmmu_free(g, inst_block);
2677 if (inst_block->cpuva) {
2678 dma_free_coherent(dev, inst_block->size,
2679 inst_block->cpuva, inst_block->iova);
2680 }
2681
2682 memset(inst_block, 0, sizeof(*inst_block));
2683} 2706}
2684 2707
2685static int gk20a_init_bar1_vm(struct mm_gk20a *mm) 2708static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
@@ -2687,7 +2710,7 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
2687 int err; 2710 int err;
2688 struct vm_gk20a *vm = &mm->bar1.vm; 2711 struct vm_gk20a *vm = &mm->bar1.vm;
2689 struct gk20a *g = gk20a_from_mm(mm); 2712 struct gk20a *g = gk20a_from_mm(mm);
2690 struct inst_desc *inst_block = &mm->bar1.inst_block; 2713 struct mem_desc *inst_block = &mm->bar1.inst_block;
2691 u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size; 2714 u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size;
2692 2715
2693 mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20; 2716 mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
@@ -2713,7 +2736,7 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm)
2713 int err; 2736 int err;
2714 struct vm_gk20a *vm = &mm->pmu.vm; 2737 struct vm_gk20a *vm = &mm->pmu.vm;
2715 struct gk20a *g = gk20a_from_mm(mm); 2738 struct gk20a *g = gk20a_from_mm(mm);
2716 struct inst_desc *inst_block = &mm->pmu.inst_block; 2739 struct mem_desc *inst_block = &mm->pmu.inst_block;
2717 u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size; 2740 u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size;
2718 2741
2719 mm->pmu.aperture_size = GK20A_PMU_VA_SIZE; 2742 mm->pmu.aperture_size = GK20A_PMU_VA_SIZE;
@@ -2739,7 +2762,7 @@ static int gk20a_init_hwpm(struct mm_gk20a *mm)
2739 int err; 2762 int err;
2740 struct vm_gk20a *vm = &mm->pmu.vm; 2763 struct vm_gk20a *vm = &mm->pmu.vm;
2741 struct gk20a *g = gk20a_from_mm(mm); 2764 struct gk20a *g = gk20a_from_mm(mm);
2742 struct inst_desc *inst_block = &mm->hwpm.inst_block; 2765 struct mem_desc *inst_block = &mm->hwpm.inst_block;
2743 2766
2744 err = gk20a_alloc_inst_block(g, inst_block); 2767 err = gk20a_alloc_inst_block(g, inst_block);
2745 if (err) 2768 if (err)
@@ -2763,13 +2786,13 @@ void gk20a_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr)
2763 ram_in_page_dir_base_hi_f(pdb_addr_hi)); 2786 ram_in_page_dir_base_hi_f(pdb_addr_hi));
2764} 2787}
2765 2788
2766void gk20a_init_inst_block(struct inst_desc *inst_block, struct vm_gk20a *vm, 2789void gk20a_init_inst_block(struct mem_desc *inst_block, struct vm_gk20a *vm,
2767 u32 big_page_size) 2790 u32 big_page_size)
2768{ 2791{
2769 struct gk20a *g = gk20a_from_vm(vm); 2792 struct gk20a *g = gk20a_from_vm(vm);
2770 u64 pde_addr = gk20a_mm_iova_addr(g, vm->pdb.sgt->sgl); 2793 u64 pde_addr = gk20a_mm_iova_addr(g, vm->pdb.sgt->sgl);
2771 phys_addr_t inst_pa = inst_block->cpu_pa; 2794 phys_addr_t inst_pa = gk20a_mem_phys(inst_block);
2772 void *inst_ptr = inst_block->cpuva; 2795 void *inst_ptr = inst_block->cpu_va;
2773 2796
2774 gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p", 2797 gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p",
2775 (u64)inst_pa, inst_ptr); 2798 (u64)inst_pa, inst_ptr);
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 54028e73..ca7fef01 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -46,7 +46,7 @@
46 46
47struct mem_desc { 47struct mem_desc {
48 void *cpu_va; 48 void *cpu_va;
49 dma_addr_t iova; 49 struct sg_table *sgt;
50 size_t size; 50 size_t size;
51 u64 gpu_va; 51 u64 gpu_va;
52}; 52};
@@ -70,40 +70,6 @@ struct gpfifo_desc {
70 u64 gpu_va; 70 u64 gpu_va;
71}; 71};
72 72
73struct mmu_desc {
74 void *cpuva;
75 u64 iova;
76 size_t size;
77};
78
79struct inst_desc {
80 u64 iova;
81 void *cpuva;
82 phys_addr_t cpu_pa;
83 size_t size;
84};
85
86struct surface_mem_desc {
87 u64 iova;
88 void *cpuva;
89 struct sg_table *sgt;
90 size_t size;
91};
92
93struct userd_desc {
94 struct sg_table *sgt;
95 u64 iova;
96 void *cpuva;
97 size_t size;
98 u64 gpu_va;
99};
100
101struct runlist_mem_desc {
102 u64 iova;
103 void *cpuva;
104 size_t size;
105};
106
107struct patch_desc { 73struct patch_desc {
108 struct page **pages; 74 struct page **pages;
109 u64 iova; 75 u64 iova;
@@ -113,13 +79,6 @@ struct patch_desc {
113 u32 data_count; 79 u32 data_count;
114}; 80};
115 81
116struct pmu_mem_desc {
117 void *cpuva;
118 u64 iova;
119 u64 pmu_va;
120 size_t size;
121};
122
123struct priv_cmd_queue_mem_desc { 82struct priv_cmd_queue_mem_desc {
124 u64 base_iova; 83 u64 base_iova;
125 u32 *base_cpuva; 84 u32 *base_cpuva;
@@ -336,24 +295,24 @@ struct mm_gk20a {
336 struct { 295 struct {
337 u32 aperture_size; 296 u32 aperture_size;
338 struct vm_gk20a vm; 297 struct vm_gk20a vm;
339 struct inst_desc inst_block; 298 struct mem_desc inst_block;
340 } bar1; 299 } bar1;
341 300
342 struct { 301 struct {
343 u32 aperture_size; 302 u32 aperture_size;
344 struct vm_gk20a vm; 303 struct vm_gk20a vm;
345 struct inst_desc inst_block; 304 struct mem_desc inst_block;
346 } bar2; 305 } bar2;
347 306
348 struct { 307 struct {
349 u32 aperture_size; 308 u32 aperture_size;
350 struct vm_gk20a vm; 309 struct vm_gk20a vm;
351 struct inst_desc inst_block; 310 struct mem_desc inst_block;
352 } pmu; 311 } pmu;
353 312
354 struct { 313 struct {
355 /* using pmu vm currently */ 314 /* using pmu vm currently */
356 struct inst_desc inst_block; 315 struct mem_desc inst_block;
357 } hwpm; 316 } hwpm;
358 317
359 318
@@ -406,9 +365,9 @@ static inline int max_vaddr_bits_gk20a(void)
406#define bar1_instance_block_shift_gk20a() bus_bar1_block_ptr_shift_v() 365#define bar1_instance_block_shift_gk20a() bus_bar1_block_ptr_shift_v()
407#endif 366#endif
408 367
409int gk20a_alloc_inst_block(struct gk20a *g, struct inst_desc *inst_block); 368int gk20a_alloc_inst_block(struct gk20a *g, struct mem_desc *inst_block);
410void gk20a_free_inst_block(struct gk20a *g, struct inst_desc *inst_block); 369void gk20a_free_inst_block(struct gk20a *g, struct mem_desc *inst_block);
411void gk20a_init_inst_block(struct inst_desc *inst_block, struct vm_gk20a *vm, 370void gk20a_init_inst_block(struct mem_desc *inst_block, struct vm_gk20a *vm,
412 u32 big_page_size); 371 u32 big_page_size);
413 372
414void gk20a_mm_dump_vm(struct vm_gk20a *vm, 373void gk20a_mm_dump_vm(struct vm_gk20a *vm,
@@ -448,9 +407,31 @@ int gk20a_gmmu_alloc_map(struct vm_gk20a *vm,
448 size_t size, 407 size_t size,
449 struct mem_desc *mem); 408 struct mem_desc *mem);
450 409
410int gk20a_gmmu_alloc_map_attr(struct vm_gk20a *vm,
411 enum dma_attr attr,
412 size_t size,
413 struct mem_desc *mem);
414
451void gk20a_gmmu_unmap_free(struct vm_gk20a *vm, 415void gk20a_gmmu_unmap_free(struct vm_gk20a *vm,
452 struct mem_desc *mem); 416 struct mem_desc *mem);
453 417
418int gk20a_gmmu_alloc(struct gk20a *g,
419 size_t size,
420 struct mem_desc *mem);
421
422int gk20a_gmmu_alloc_attr(struct gk20a *g,
423 enum dma_attr attr,
424 size_t size,
425 struct mem_desc *mem);
426
427void gk20a_gmmu_free(struct gk20a *g,
428 struct mem_desc *mem);
429
430static inline phys_addr_t gk20a_mem_phys(struct mem_desc *mem)
431{
432 return sg_phys(mem->sgt->sgl);
433}
434
454u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, 435u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
455 u64 map_offset, 436 u64 map_offset,
456 struct sg_table *sgt, 437 struct sg_table *sgt,
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
index f2430165..95bb1eb6 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -146,7 +146,7 @@ static void set_pmu_cmdline_args_falctracesize_v2(
146 146
147static void set_pmu_cmdline_args_falctracedmabase_v2(struct pmu_gk20a *pmu) 147static void set_pmu_cmdline_args_falctracedmabase_v2(struct pmu_gk20a *pmu)
148{ 148{
149 pmu->args_v2.falc_trace_dma_base = ((u32)pmu->trace_buf.pmu_va)/0x100; 149 pmu->args_v2.falc_trace_dma_base = ((u32)pmu->trace_buf.gpu_va)/0x100;
150} 150}
151 151
152static void set_pmu_cmdline_args_falctracedmaidx_v2( 152static void set_pmu_cmdline_args_falctracedmaidx_v2(
@@ -177,7 +177,7 @@ static void set_pmu_cmdline_args_falctracesize_v3(
177 177
178static void set_pmu_cmdline_args_falctracedmabase_v3(struct pmu_gk20a *pmu) 178static void set_pmu_cmdline_args_falctracedmabase_v3(struct pmu_gk20a *pmu)
179{ 179{
180 pmu->args_v3.falc_trace_dma_base = ((u32)pmu->trace_buf.pmu_va)/0x100; 180 pmu->args_v3.falc_trace_dma_base = ((u32)pmu->trace_buf.gpu_va)/0x100;
181} 181}
182 182
183static void set_pmu_cmdline_args_falctracedmaidx_v3( 183static void set_pmu_cmdline_args_falctracedmaidx_v3(
@@ -218,9 +218,9 @@ static bool find_hex_in_string(char *strings, struct gk20a *g, u32 *hex_pos)
218static void printtrace(struct pmu_gk20a *pmu) 218static void printtrace(struct pmu_gk20a *pmu)
219{ 219{
220 u32 i = 0, j = 0, k, l, m, count; 220 u32 i = 0, j = 0, k, l, m, count;
221 char *trace = pmu->trace_buf.cpuva; 221 char *trace = pmu->trace_buf.cpu_va;
222 char part_str[40], buf[0x40]; 222 char part_str[40], buf[0x40];
223 u32 *trace1 = pmu->trace_buf.cpuva; 223 u32 *trace1 = pmu->trace_buf.cpu_va;
224 struct gk20a *g = gk20a_from_pmu(pmu); 224 struct gk20a *g = gk20a_from_pmu(pmu);
225 gk20a_err(dev_from_gk20a(g), "Dump pmutrace"); 225 gk20a_err(dev_from_gk20a(g), "Dump pmutrace");
226 for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 0x40) { 226 for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 0x40) {
@@ -249,7 +249,7 @@ static void printtrace(struct pmu_gk20a *pmu)
249 249
250static void set_pmu_cmdline_args_falctracedmabase_v1(struct pmu_gk20a *pmu) 250static void set_pmu_cmdline_args_falctracedmabase_v1(struct pmu_gk20a *pmu)
251{ 251{
252 pmu->args_v1.falc_trace_dma_base = ((u32)pmu->trace_buf.pmu_va)/0x100; 252 pmu->args_v1.falc_trace_dma_base = ((u32)pmu->trace_buf.gpu_va)/0x100;
253} 253}
254 254
255static void set_pmu_cmdline_args_falctracedmaidx_v1( 255static void set_pmu_cmdline_args_falctracedmaidx_v1(
@@ -1349,7 +1349,7 @@ static int pmu_bootstrap(struct pmu_gk20a *pmu)
1349 pwr_falcon_itfen_ctxen_enable_f()); 1349 pwr_falcon_itfen_ctxen_enable_f());
1350 gk20a_writel(g, pwr_pmu_new_instblk_r(), 1350 gk20a_writel(g, pwr_pmu_new_instblk_r(),
1351 pwr_pmu_new_instblk_ptr_f( 1351 pwr_pmu_new_instblk_ptr_f(
1352 mm->pmu.inst_block.cpu_pa >> 12) | 1352 sg_phys(mm->pmu.inst_block.sgt->sgl) >> 12) |
1353 pwr_pmu_new_instblk_valid_f(1) | 1353 pwr_pmu_new_instblk_valid_f(1) |
1354 pwr_pmu_new_instblk_target_sys_coh_f()); 1354 pwr_pmu_new_instblk_target_sys_coh_f());
1355 1355
@@ -1377,13 +1377,13 @@ static int pmu_bootstrap(struct pmu_gk20a *pmu)
1377 pwr_falcon_dmemc_blk_f(0) | 1377 pwr_falcon_dmemc_blk_f(0) |
1378 pwr_falcon_dmemc_aincw_f(1)); 1378 pwr_falcon_dmemc_aincw_f(1));
1379 1379
1380 addr_code = u64_lo32((pmu->ucode.pmu_va + 1380 addr_code = u64_lo32((pmu->ucode.gpu_va +
1381 desc->app_start_offset + 1381 desc->app_start_offset +
1382 desc->app_resident_code_offset) >> 8) ; 1382 desc->app_resident_code_offset) >> 8) ;
1383 addr_data = u64_lo32((pmu->ucode.pmu_va + 1383 addr_data = u64_lo32((pmu->ucode.gpu_va +
1384 desc->app_start_offset + 1384 desc->app_start_offset +
1385 desc->app_resident_data_offset) >> 8); 1385 desc->app_resident_data_offset) >> 8);
1386 addr_load = u64_lo32((pmu->ucode.pmu_va + 1386 addr_load = u64_lo32((pmu->ucode.gpu_va +
1387 desc->bootloader_start_offset) >> 8); 1387 desc->bootloader_start_offset) >> 8);
1388 1388
1389 gk20a_writel(g, pwr_falcon_dmemd_r(0), GK20A_PMU_DMAIDX_UCODE); 1389 gk20a_writel(g, pwr_falcon_dmemd_r(0), GK20A_PMU_DMAIDX_UCODE);
@@ -1942,13 +1942,10 @@ static int gk20a_prepare_ucode(struct gk20a *g)
1942{ 1942{
1943 struct pmu_gk20a *pmu = &g->pmu; 1943 struct pmu_gk20a *pmu = &g->pmu;
1944 int i, err = 0; 1944 int i, err = 0;
1945 struct sg_table *sgt_pmu_ucode;
1946 dma_addr_t iova;
1947 struct device *d = dev_from_gk20a(g); 1945 struct device *d = dev_from_gk20a(g);
1948 struct mm_gk20a *mm = &g->mm; 1946 struct mm_gk20a *mm = &g->mm;
1949 struct vm_gk20a *vm = &mm->pmu.vm; 1947 struct vm_gk20a *vm = &mm->pmu.vm;
1950 void *ucode_ptr; 1948 void *ucode_ptr;
1951 DEFINE_DMA_ATTRS(attrs);
1952 1949
1953 if (g->pmu_fw) { 1950 if (g->pmu_fw) {
1954 gk20a_init_pmu(pmu); 1951 gk20a_init_pmu(pmu);
@@ -1967,56 +1964,21 @@ static int gk20a_prepare_ucode(struct gk20a *g)
1967 pmu->ucode_image = (u32 *)((u8 *)pmu->desc + 1964 pmu->ucode_image = (u32 *)((u8 *)pmu->desc +
1968 pmu->desc->descriptor_size); 1965 pmu->desc->descriptor_size);
1969 1966
1970 dma_set_attr(DMA_ATTR_READ_ONLY, &attrs); 1967 err = gk20a_gmmu_alloc_map_attr(vm, DMA_ATTR_READ_ONLY,
1971 pmu->ucode.cpuva = dma_alloc_attrs(d, GK20A_PMU_UCODE_SIZE_MAX, 1968 GK20A_PMU_UCODE_SIZE_MAX, &pmu->ucode);
1972 &iova, 1969 if (err)
1973 GFP_KERNEL,
1974 &attrs);
1975 if (!pmu->ucode.cpuva) {
1976 gk20a_err(d, "failed to allocate memory\n");
1977 err = -ENOMEM;
1978 goto err_release_fw; 1970 goto err_release_fw;
1979 }
1980
1981 pmu->ucode.iova = iova;
1982
1983 err = gk20a_get_sgtable(d, &sgt_pmu_ucode,
1984 pmu->ucode.cpuva,
1985 pmu->ucode.iova,
1986 GK20A_PMU_UCODE_SIZE_MAX);
1987 if (err) {
1988 gk20a_err(d, "failed to allocate sg table\n");
1989 goto err_free_pmu_ucode;
1990 }
1991 1971
1992 pmu->ucode.pmu_va = gk20a_gmmu_map(vm, &sgt_pmu_ucode, 1972 ucode_ptr = pmu->ucode.cpu_va;
1993 GK20A_PMU_UCODE_SIZE_MAX,
1994 0, /* flags */
1995 gk20a_mem_flag_read_only);
1996 if (!pmu->ucode.pmu_va) {
1997 gk20a_err(d, "failed to map pmu ucode memory!!");
1998 goto err_free_ucode_sgt;
1999 }
2000
2001 ucode_ptr = pmu->ucode.cpuva;
2002 1973
2003 for (i = 0; i < (pmu->desc->app_start_offset + 1974 for (i = 0; i < (pmu->desc->app_start_offset +
2004 pmu->desc->app_size) >> 2; i++) 1975 pmu->desc->app_size) >> 2; i++)
2005 gk20a_mem_wr32(ucode_ptr, i, pmu->ucode_image[i]); 1976 gk20a_mem_wr32(ucode_ptr, i, pmu->ucode_image[i]);
2006 1977
2007 gk20a_free_sgtable(&sgt_pmu_ucode);
2008
2009 gk20a_init_pmu(pmu); 1978 gk20a_init_pmu(pmu);
2010 1979
2011 return 0; 1980 return 0;
2012 1981
2013 err_free_ucode_sgt:
2014 gk20a_free_sgtable(&sgt_pmu_ucode);
2015 err_free_pmu_ucode:
2016 dma_free_attrs(d, GK20A_PMU_UCODE_SIZE_MAX,
2017 pmu->ucode.cpuva, pmu->ucode.iova, &attrs);
2018 pmu->ucode.cpuva = NULL;
2019 pmu->ucode.iova = 0;
2020 err_release_fw: 1982 err_release_fw:
2021 release_firmware(g->pmu_fw); 1983 release_firmware(g->pmu_fw);
2022 1984
@@ -2031,9 +1993,6 @@ static int gk20a_init_pmu_setup_sw(struct gk20a *g)
2031 struct device *d = dev_from_gk20a(g); 1993 struct device *d = dev_from_gk20a(g);
2032 int i, err = 0; 1994 int i, err = 0;
2033 u8 *ptr; 1995 u8 *ptr;
2034 struct sg_table *sgt_seq_buf;
2035 struct sg_table *sgt_pmu_buf;
2036 dma_addr_t iova;
2037 1996
2038 gk20a_dbg_fn(""); 1997 gk20a_dbg_fn("");
2039 1998
@@ -2082,70 +2041,19 @@ static int gk20a_init_pmu_setup_sw(struct gk20a *g)
2082 2041
2083 INIT_WORK(&pmu->pg_init, pmu_setup_hw); 2042 INIT_WORK(&pmu->pg_init, pmu_setup_hw);
2084 2043
2085 pmu->seq_buf.cpuva = dma_alloc_coherent(d, GK20A_PMU_SEQ_BUF_SIZE, 2044 err = gk20a_gmmu_alloc_map(vm, GK20A_PMU_SEQ_BUF_SIZE, &pmu->seq_buf);
2086 &iova, 2045 if (err) {
2087 GFP_KERNEL);
2088 if (!pmu->seq_buf.cpuva) {
2089 gk20a_err(d, "failed to allocate memory\n"); 2046 gk20a_err(d, "failed to allocate memory\n");
2090 err = -ENOMEM;
2091 goto err_free_seq; 2047 goto err_free_seq;
2092 } 2048 }
2093 2049
2094 pmu->seq_buf.iova = iova; 2050 err = gk20a_gmmu_alloc_map(vm, GK20A_PMU_TRACE_BUFSIZE, &pmu->trace_buf);
2095 2051 if (err) {
2096 pmu->trace_buf.cpuva = dma_alloc_coherent(d, GK20A_PMU_TRACE_BUFSIZE,
2097 &iova,
2098 GFP_KERNEL);
2099 if (!pmu->trace_buf.cpuva) {
2100 gk20a_err(d, "failed to allocate trace memory\n"); 2052 gk20a_err(d, "failed to allocate trace memory\n");
2101 err = -ENOMEM;
2102 goto err_free_seq_buf; 2053 goto err_free_seq_buf;
2103 } 2054 }
2104 pmu->trace_buf.iova = iova;
2105 2055
2106 err = gk20a_get_sgtable(d, &sgt_seq_buf, 2056 ptr = (u8 *)pmu->seq_buf.cpu_va;
2107 pmu->seq_buf.cpuva,
2108 pmu->seq_buf.iova,
2109 GK20A_PMU_SEQ_BUF_SIZE);
2110 if (err) {
2111 gk20a_err(d, "failed to allocate seq buf sg table\n");
2112 goto err_free_trace_buf;
2113 }
2114
2115 pmu->seq_buf.pmu_va = gk20a_gmmu_map(vm, &sgt_seq_buf,
2116 GK20A_PMU_SEQ_BUF_SIZE,
2117 0, /* flags */
2118 gk20a_mem_flag_none);
2119 if (!pmu->seq_buf.pmu_va) {
2120 gk20a_err(d, "failed to gmmu map seq buf memory!!");
2121 err = -ENOMEM;
2122 goto err_free_seq_buf_sgt;
2123 }
2124
2125 err = gk20a_get_sgtable(d, &sgt_pmu_buf,
2126 pmu->trace_buf.cpuva,
2127 pmu->trace_buf.iova,
2128 GK20A_PMU_TRACE_BUFSIZE);
2129 if (err) {
2130 gk20a_err(d, "failed to allocate sg table for Trace\n");
2131 goto err_unmap_seq_buf;
2132 }
2133
2134 pmu->trace_buf.pmu_va = gk20a_gmmu_map(vm, &sgt_pmu_buf,
2135 GK20A_PMU_TRACE_BUFSIZE,
2136 0, /* flags */
2137 gk20a_mem_flag_none);
2138 if (!pmu->trace_buf.pmu_va) {
2139 gk20a_err(d, "failed to gmmu map pmu trace memory!!");
2140 err = -ENOMEM;
2141 goto err_free_trace_buf_sgt;
2142 }
2143
2144 ptr = (u8 *)pmu->seq_buf.cpuva;
2145 if (!ptr) {
2146 gk20a_err(d, "failed to map cpu ptr for zbc buffer");
2147 goto err_unmap_trace_buf;
2148 }
2149 2057
2150 /* TBD: remove this if ZBC save/restore is handled by PMU 2058 /* TBD: remove this if ZBC save/restore is handled by PMU
2151 * end an empty ZBC sequence for now */ 2059 * end an empty ZBC sequence for now */
@@ -2155,35 +2063,13 @@ static int gk20a_init_pmu_setup_sw(struct gk20a *g)
2155 2063
2156 pmu->seq_buf.size = GK20A_PMU_SEQ_BUF_SIZE; 2064 pmu->seq_buf.size = GK20A_PMU_SEQ_BUF_SIZE;
2157 2065
2158 gk20a_free_sgtable(&sgt_seq_buf);
2159 gk20a_free_sgtable(&sgt_pmu_buf);
2160
2161 pmu->sw_ready = true; 2066 pmu->sw_ready = true;
2162 2067
2163skip_init: 2068skip_init:
2164 gk20a_dbg_fn("done"); 2069 gk20a_dbg_fn("done");
2165 return 0; 2070 return 0;
2166 err_unmap_trace_buf:
2167 gk20a_gmmu_unmap(vm, pmu->trace_buf.pmu_va,
2168 GK20A_PMU_TRACE_BUFSIZE, gk20a_mem_flag_none);
2169 err_free_trace_buf_sgt:
2170 gk20a_free_sgtable(&sgt_pmu_buf);
2171 err_unmap_seq_buf:
2172 gk20a_gmmu_unmap(vm, pmu->seq_buf.pmu_va,
2173 GK20A_PMU_SEQ_BUF_SIZE, gk20a_mem_flag_none);
2174 err_free_seq_buf_sgt:
2175 gk20a_free_sgtable(&sgt_seq_buf);
2176 err_free_trace_buf:
2177 dma_free_coherent(d, GK20A_PMU_TRACE_BUFSIZE,
2178 pmu->trace_buf.cpuva, pmu->trace_buf.iova);
2179 pmu->trace_buf.cpuva = NULL;
2180 pmu->trace_buf.iova = 0;
2181
2182 err_free_seq_buf: 2071 err_free_seq_buf:
2183 dma_free_coherent(d, GK20A_PMU_SEQ_BUF_SIZE, 2072 gk20a_gmmu_unmap_free(vm, &pmu->seq_buf);
2184 pmu->seq_buf.cpuva, pmu->seq_buf.iova);
2185 pmu->seq_buf.cpuva = NULL;
2186 pmu->seq_buf.iova = 0;
2187 err_free_seq: 2073 err_free_seq:
2188 kfree(pmu->seq); 2074 kfree(pmu->seq);
2189 err_free_mutex: 2075 err_free_mutex:
@@ -2306,8 +2192,8 @@ int gk20a_init_pmu_bind_fecs(struct gk20a *g)
2306 cmd.cmd.pg.eng_buf_load.engine_id = ENGINE_GR_GK20A; 2192 cmd.cmd.pg.eng_buf_load.engine_id = ENGINE_GR_GK20A;
2307 cmd.cmd.pg.eng_buf_load.buf_idx = PMU_PGENG_GR_BUFFER_IDX_FECS; 2193 cmd.cmd.pg.eng_buf_load.buf_idx = PMU_PGENG_GR_BUFFER_IDX_FECS;
2308 cmd.cmd.pg.eng_buf_load.buf_size = pmu->pg_buf.size; 2194 cmd.cmd.pg.eng_buf_load.buf_size = pmu->pg_buf.size;
2309 cmd.cmd.pg.eng_buf_load.dma_base = u64_lo32(pmu->pg_buf.pmu_va >> 8); 2195 cmd.cmd.pg.eng_buf_load.dma_base = u64_lo32(pmu->pg_buf.gpu_va >> 8);
2310 cmd.cmd.pg.eng_buf_load.dma_offset = (u8)(pmu->pg_buf.pmu_va & 0xFF); 2196 cmd.cmd.pg.eng_buf_load.dma_offset = (u8)(pmu->pg_buf.gpu_va & 0xFF);
2311 cmd.cmd.pg.eng_buf_load.dma_idx = PMU_DMAIDX_VIRT; 2197 cmd.cmd.pg.eng_buf_load.dma_idx = PMU_DMAIDX_VIRT;
2312 2198
2313 pmu->buf_loaded = false; 2199 pmu->buf_loaded = false;
@@ -2331,8 +2217,8 @@ static void pmu_setup_hw_load_zbc(struct gk20a *g)
2331 cmd.cmd.pg.eng_buf_load.engine_id = ENGINE_GR_GK20A; 2217 cmd.cmd.pg.eng_buf_load.engine_id = ENGINE_GR_GK20A;
2332 cmd.cmd.pg.eng_buf_load.buf_idx = PMU_PGENG_GR_BUFFER_IDX_ZBC; 2218 cmd.cmd.pg.eng_buf_load.buf_idx = PMU_PGENG_GR_BUFFER_IDX_ZBC;
2333 cmd.cmd.pg.eng_buf_load.buf_size = pmu->seq_buf.size; 2219 cmd.cmd.pg.eng_buf_load.buf_size = pmu->seq_buf.size;
2334 cmd.cmd.pg.eng_buf_load.dma_base = u64_lo32(pmu->seq_buf.pmu_va >> 8); 2220 cmd.cmd.pg.eng_buf_load.dma_base = u64_lo32(pmu->seq_buf.gpu_va >> 8);
2335 cmd.cmd.pg.eng_buf_load.dma_offset = (u8)(pmu->seq_buf.pmu_va & 0xFF); 2221 cmd.cmd.pg.eng_buf_load.dma_offset = (u8)(pmu->seq_buf.gpu_va & 0xFF);
2336 cmd.cmd.pg.eng_buf_load.dma_idx = PMU_DMAIDX_VIRT; 2222 cmd.cmd.pg.eng_buf_load.dma_idx = PMU_DMAIDX_VIRT;
2337 2223
2338 pmu->buf_loaded = false; 2224 pmu->buf_loaded = false;
@@ -4100,9 +3986,9 @@ static int falc_trace_show(struct seq_file *s, void *data)
4100 struct gk20a *g = s->private; 3986 struct gk20a *g = s->private;
4101 struct pmu_gk20a *pmu = &g->pmu; 3987 struct pmu_gk20a *pmu = &g->pmu;
4102 u32 i = 0, j = 0, k, l, m; 3988 u32 i = 0, j = 0, k, l, m;
4103 char *trace = pmu->trace_buf.cpuva; 3989 char *trace = pmu->trace_buf.cpu_va;
4104 char part_str[40]; 3990 char part_str[40];
4105 u32 *trace1 = pmu->trace_buf.cpuva; 3991 u32 *trace1 = pmu->trace_buf.cpu_va;
4106 for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 0x40) { 3992 for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 0x40) {
4107 for (j = 0; j < 0x40; j++) 3993 for (j = 0; j < 0x40; j++)
4108 if (trace1[(i / 4) + j]) 3994 if (trace1[(i / 4) + j])
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
index e4865180..6cd173e8 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
@@ -1124,12 +1124,12 @@ struct pmu_pg_stats {
1124struct pmu_gk20a { 1124struct pmu_gk20a {
1125 1125
1126 struct pmu_ucode_desc *desc; 1126 struct pmu_ucode_desc *desc;
1127 struct pmu_mem_desc ucode; 1127 struct mem_desc ucode;
1128 1128
1129 struct pmu_mem_desc pg_buf; 1129 struct mem_desc pg_buf;
1130 /* TBD: remove this if ZBC seq is fixed */ 1130 /* TBD: remove this if ZBC seq is fixed */
1131 struct pmu_mem_desc seq_buf; 1131 struct mem_desc seq_buf;
1132 struct pmu_mem_desc trace_buf; 1132 struct mem_desc trace_buf;
1133 bool buf_loaded; 1133 bool buf_loaded;
1134 1134
1135 struct pmu_sha1_gid gid_info; 1135 struct pmu_sha1_gid gid_info;
diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
index a58f726a..ecb0f8ab 100644
--- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
@@ -194,7 +194,7 @@ int fecs_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img)
194 g->ctxsw_ucode_info.fecs.code.offset; 194 g->ctxsw_ucode_info.fecs.code.offset;
195 p_img->desc->app_resident_data_size = 195 p_img->desc->app_resident_data_size =
196 g->ctxsw_ucode_info.fecs.data.size; 196 g->ctxsw_ucode_info.fecs.data.size;
197 p_img->data = g->ctxsw_ucode_info.surface_desc.cpuva; 197 p_img->data = g->ctxsw_ucode_info.surface_desc.cpu_va;
198 p_img->data_size = p_img->desc->image_size; 198 p_img->data_size = p_img->desc->image_size;
199 199
200 p_img->fw_ver = NULL; 200 p_img->fw_ver = NULL;
@@ -874,11 +874,8 @@ int gm20b_bootstrap_hs_flcn(struct gk20a *g)
874{ 874{
875 struct mm_gk20a *mm = &g->mm; 875 struct mm_gk20a *mm = &g->mm;
876 struct vm_gk20a *vm = &mm->pmu.vm; 876 struct vm_gk20a *vm = &mm->pmu.vm;
877 struct device *d = dev_from_gk20a(g);
878 int i, err = 0; 877 int i, err = 0;
879 struct sg_table *sgt_pmu_ucode = NULL; 878 u64 *acr_dmem;
880 dma_addr_t iova;
881 u64 *pacr_ucode_cpuva = NULL, pacr_ucode_pmu_va = 0, *acr_dmem;
882 u32 img_size_in_bytes = 0; 879 u32 img_size_in_bytes = 0;
883 u32 status, size; 880 u32 status, size;
884 u64 start; 881 u64 start;
@@ -924,36 +921,18 @@ int gm20b_bootstrap_hs_flcn(struct gk20a *g)
924 err = -1; 921 err = -1;
925 goto err_release_acr_fw; 922 goto err_release_acr_fw;
926 } 923 }
927 pacr_ucode_cpuva = dma_alloc_coherent(d, img_size_in_bytes, 924 err = gk20a_gmmu_alloc_map(vm, img_size_in_bytes,
928 &iova, GFP_KERNEL); 925 &acr->acr_ucode);
929 if (!pacr_ucode_cpuva) { 926 if (err) {
930 err = -ENOMEM; 927 err = -ENOMEM;
931 goto err_release_acr_fw; 928 goto err_release_acr_fw;
932 } 929 }
933 930
934 err = gk20a_get_sgtable(d, &sgt_pmu_ucode,
935 pacr_ucode_cpuva,
936 iova,
937 img_size_in_bytes);
938 if (err) {
939 gk20a_err(d, "failed to allocate sg table\n");
940 err = -ENOMEM;
941 goto err_free_acr_buf;
942 }
943 pacr_ucode_pmu_va = gk20a_gmmu_map(vm, &sgt_pmu_ucode,
944 img_size_in_bytes,
945 0, /* flags */
946 gk20a_mem_flag_read_only);
947 if (!pacr_ucode_pmu_va) {
948 gk20a_err(d, "failed to map pmu ucode memory!!");
949 err = -ENOMEM;
950 goto err_free_ucode_sgt;
951 }
952 acr_dmem = (u64 *) 931 acr_dmem = (u64 *)
953 &(((u8 *)acr_ucode_data_t210_load)[ 932 &(((u8 *)acr_ucode_data_t210_load)[
954 acr_ucode_header_t210_load[2]]); 933 acr_ucode_header_t210_load[2]]);
955 acr->acr_dmem_desc = (struct flcn_acr_desc *)((u8 *)( 934 acr->acr_dmem_desc = (struct flcn_acr_desc *)((u8 *)(
956 pacr_ucode_cpuva) + acr_ucode_header_t210_load[2]); 935 acr->acr_ucode.cpu_va) + acr_ucode_header_t210_load[2]);
957 ((struct flcn_acr_desc *)acr_dmem)->nonwpr_ucode_blob_start = 936 ((struct flcn_acr_desc *)acr_dmem)->nonwpr_ucode_blob_start =
958 start; 937 start;
959 ((struct flcn_acr_desc *)acr_dmem)->nonwpr_ucode_blob_size = 938 ((struct flcn_acr_desc *)acr_dmem)->nonwpr_ucode_blob_size =
@@ -962,13 +941,9 @@ int gm20b_bootstrap_hs_flcn(struct gk20a *g)
962 ((struct flcn_acr_desc *)acr_dmem)->wpr_offset = 0; 941 ((struct flcn_acr_desc *)acr_dmem)->wpr_offset = 0;
963 942
964 for (i = 0; i < (img_size_in_bytes/4); i++) { 943 for (i = 0; i < (img_size_in_bytes/4); i++) {
965 gk20a_mem_wr32(pacr_ucode_cpuva, i, 944 gk20a_mem_wr32(acr->acr_ucode.cpu_va, i,
966 acr_ucode_data_t210_load[i]); 945 acr_ucode_data_t210_load[i]);
967 } 946 }
968 acr->acr_ucode.cpuva = pacr_ucode_cpuva;
969 acr->acr_ucode.iova = iova;
970 acr->acr_ucode.pmu_va = pacr_ucode_pmu_va;
971 acr->acr_ucode.size = img_size_in_bytes;
972 /* 947 /*
973 * In order to execute this binary, we will be using 948 * In order to execute this binary, we will be using
974 * a bootloader which will load this image into PMU IMEM/DMEM. 949 * a bootloader which will load this image into PMU IMEM/DMEM.
@@ -983,7 +958,7 @@ int gm20b_bootstrap_hs_flcn(struct gk20a *g)
983 bl_dmem_desc->signature[3] = 0; 958 bl_dmem_desc->signature[3] = 0;
984 bl_dmem_desc->ctx_dma = GK20A_PMU_DMAIDX_VIRT; 959 bl_dmem_desc->ctx_dma = GK20A_PMU_DMAIDX_VIRT;
985 bl_dmem_desc->code_dma_base = 960 bl_dmem_desc->code_dma_base =
986 (unsigned int)(((u64)pacr_ucode_pmu_va >> 8)); 961 (unsigned int)(((u64)acr->acr_ucode.gpu_va >> 8));
987 bl_dmem_desc->non_sec_code_off = acr_ucode_header_t210_load[0]; 962 bl_dmem_desc->non_sec_code_off = acr_ucode_header_t210_load[0];
988 bl_dmem_desc->non_sec_code_size = acr_ucode_header_t210_load[1]; 963 bl_dmem_desc->non_sec_code_size = acr_ucode_header_t210_load[1];
989 bl_dmem_desc->sec_code_off = acr_ucode_header_t210_load[5]; 964 bl_dmem_desc->sec_code_off = acr_ucode_header_t210_load[5];
@@ -993,8 +968,6 @@ int gm20b_bootstrap_hs_flcn(struct gk20a *g)
993 bl_dmem_desc->code_dma_base + 968 bl_dmem_desc->code_dma_base +
994 ((acr_ucode_header_t210_load[2]) >> 8); 969 ((acr_ucode_header_t210_load[2]) >> 8);
995 bl_dmem_desc->data_size = acr_ucode_header_t210_load[3]; 970 bl_dmem_desc->data_size = acr_ucode_header_t210_load[3];
996 gk20a_free_sgtable(&sgt_pmu_ucode);
997 sgt_pmu_ucode = NULL;
998 } else 971 } else
999 acr->acr_dmem_desc->nonwpr_ucode_blob_size = 0; 972 acr->acr_dmem_desc->nonwpr_ucode_blob_size = 0;
1000 status = pmu_exec_gen_bl(g, bl_dmem_desc, 1); 973 status = pmu_exec_gen_bl(g, bl_dmem_desc, 1);
@@ -1004,17 +977,7 @@ int gm20b_bootstrap_hs_flcn(struct gk20a *g)
1004 } 977 }
1005 return 0; 978 return 0;
1006err_free_ucode_map: 979err_free_ucode_map:
1007 gk20a_gmmu_unmap(vm, pacr_ucode_pmu_va, 980 gk20a_gmmu_unmap_free(vm, &acr->acr_ucode);
1008 img_size_in_bytes, gk20a_mem_flag_none);
1009 acr->acr_ucode.pmu_va = 0;
1010err_free_ucode_sgt:
1011 if (sgt_pmu_ucode)
1012 gk20a_free_sgtable(&sgt_pmu_ucode);
1013err_free_acr_buf:
1014 dma_free_coherent(d, img_size_in_bytes,
1015 pacr_ucode_cpuva, iova);
1016 acr->acr_ucode.cpuva = NULL;
1017 acr->acr_ucode.iova = 0;
1018err_release_acr_fw: 981err_release_acr_fw:
1019 release_firmware(acr_fw); 982 release_firmware(acr_fw);
1020 acr->acr_fw = NULL; 983 acr->acr_fw = NULL;
@@ -1078,7 +1041,7 @@ static int bl_bootstrap(struct pmu_gk20a *pmu,
1078 pwr_falcon_itfen_ctxen_enable_f()); 1041 pwr_falcon_itfen_ctxen_enable_f());
1079 gk20a_writel(g, pwr_pmu_new_instblk_r(), 1042 gk20a_writel(g, pwr_pmu_new_instblk_r(),
1080 pwr_pmu_new_instblk_ptr_f( 1043 pwr_pmu_new_instblk_ptr_f(
1081 mm->pmu.inst_block.cpu_pa >> 12) | 1044 sg_phys(mm->pmu.inst_block.sgt->sgl) >> 12) |
1082 pwr_pmu_new_instblk_valid_f(1) | 1045 pwr_pmu_new_instblk_valid_f(1) |
1083 pwr_pmu_new_instblk_target_sys_coh_f()); 1046 pwr_pmu_new_instblk_target_sys_coh_f());
1084 1047
@@ -1104,7 +1067,7 @@ static int bl_bootstrap(struct pmu_gk20a *pmu,
1104 pwr_falcon_imemc_aincw_f(1)); 1067 pwr_falcon_imemc_aincw_f(1));
1105 virt_addr = pmu_bl_gm10x_desc->bl_start_tag << 8; 1068 virt_addr = pmu_bl_gm10x_desc->bl_start_tag << 8;
1106 tag = virt_addr >> 8; /* tag is always 256B aligned */ 1069 tag = virt_addr >> 8; /* tag is always 256B aligned */
1107 bl_ucode = (u32 *)(acr->hsbl_ucode.cpuva); 1070 bl_ucode = (u32 *)(acr->hsbl_ucode.cpu_va);
1108 for (index = 0; index < bl_sz/4; index++) { 1071 for (index = 0; index < bl_sz/4; index++) {
1109 if ((index % 64) == 0) { 1072 if ((index % 64) == 0) {
1110 gk20a_writel(g, pwr_falcon_imemt_r(0), 1073 gk20a_writel(g, pwr_falcon_imemt_r(0),
@@ -1198,16 +1161,11 @@ int pmu_exec_gen_bl(struct gk20a *g, void *desc, u8 b_wait_for_halt)
1198 struct vm_gk20a *vm = &mm->pmu.vm; 1161 struct vm_gk20a *vm = &mm->pmu.vm;
1199 struct device *d = dev_from_gk20a(g); 1162 struct device *d = dev_from_gk20a(g);
1200 int i, err = 0; 1163 int i, err = 0;
1201 struct sg_table *sgt_pmu_ucode = NULL;
1202 dma_addr_t iova;
1203 u32 bl_sz; 1164 u32 bl_sz;
1204 void *bl_cpuva;
1205 u64 bl_pmu_va;
1206 struct acr_gm20b *acr = &g->acr; 1165 struct acr_gm20b *acr = &g->acr;
1207 const struct firmware *hsbl_fw = acr->hsbl_fw; 1166 const struct firmware *hsbl_fw = acr->hsbl_fw;
1208 struct hsflcn_bl_desc *pmu_bl_gm10x_desc; 1167 struct hsflcn_bl_desc *pmu_bl_gm10x_desc;
1209 u32 *pmu_bl_gm10x = NULL; 1168 u32 *pmu_bl_gm10x = NULL;
1210 DEFINE_DMA_ATTRS(attrs);
1211 gm20b_dbg_pmu(""); 1169 gm20b_dbg_pmu("");
1212 1170
1213 if (!hsbl_fw) { 1171 if (!hsbl_fw) {
@@ -1232,44 +1190,25 @@ int pmu_exec_gen_bl(struct gk20a *g, void *desc, u8 b_wait_for_halt)
1232 /*TODO in code verify that enable PMU is done, 1190 /*TODO in code verify that enable PMU is done,
1233 scrubbing etc is done*/ 1191 scrubbing etc is done*/
1234 /*TODO in code verify that gmmu vm init is done*/ 1192 /*TODO in code verify that gmmu vm init is done*/
1235 dma_set_attr(DMA_ATTR_READ_ONLY, &attrs); 1193 err = gk20a_gmmu_alloc_attr(g,
1236 bl_cpuva = dma_alloc_attrs(d, bl_sz, 1194 DMA_ATTR_READ_ONLY, bl_sz, &acr->hsbl_ucode);
1237 &iova, 1195 if (err) {
1238 GFP_KERNEL,
1239 &attrs);
1240 gm20b_dbg_pmu("bl size is %x\n", bl_sz);
1241 if (!bl_cpuva) {
1242 gk20a_err(d, "failed to allocate memory\n"); 1196 gk20a_err(d, "failed to allocate memory\n");
1243 err = -ENOMEM;
1244 goto err_done; 1197 goto err_done;
1245 } 1198 }
1246 acr->hsbl_ucode.cpuva = bl_cpuva;
1247 acr->hsbl_ucode.iova = iova;
1248
1249 err = gk20a_get_sgtable(d, &sgt_pmu_ucode,
1250 bl_cpuva,
1251 iova,
1252 bl_sz);
1253 if (err) {
1254 gk20a_err(d, "failed to allocate sg table\n");
1255 goto err_free_cpu_va;
1256 }
1257 1199
1258 bl_pmu_va = gk20a_gmmu_map(vm, &sgt_pmu_ucode, 1200 acr->hsbl_ucode.gpu_va = gk20a_gmmu_map(vm, &acr->hsbl_ucode.sgt,
1259 bl_sz, 1201 bl_sz,
1260 0, /* flags */ 1202 0, /* flags */
1261 gk20a_mem_flag_read_only); 1203 gk20a_mem_flag_read_only);
1262 if (!bl_pmu_va) { 1204 if (!acr->hsbl_ucode.gpu_va) {
1263 gk20a_err(d, "failed to map pmu ucode memory!!"); 1205 gk20a_err(d, "failed to map pmu ucode memory!!");
1264 goto err_free_ucode_sgt; 1206 goto err_free_ucode;
1265 } 1207 }
1266 acr->hsbl_ucode.pmu_va = bl_pmu_va;
1267 1208
1268 for (i = 0; i < (bl_sz) >> 2; i++) 1209 for (i = 0; i < (bl_sz) >> 2; i++)
1269 gk20a_mem_wr32(bl_cpuva, i, pmu_bl_gm10x[i]); 1210 gk20a_mem_wr32(acr->hsbl_ucode.cpu_va, i, pmu_bl_gm10x[i]);
1270 gm20b_dbg_pmu("Copied bl ucode to bl_cpuva\n"); 1211 gm20b_dbg_pmu("Copied bl ucode to bl_cpuva\n");
1271 gk20a_free_sgtable(&sgt_pmu_ucode);
1272 sgt_pmu_ucode = NULL;
1273 } 1212 }
1274 /* 1213 /*
1275 * Disable interrupts to avoid kernel hitting breakpoint due 1214 * Disable interrupts to avoid kernel hitting breakpoint due
@@ -1306,14 +1245,10 @@ int pmu_exec_gen_bl(struct gk20a *g, void *desc, u8 b_wait_for_halt)
1306 start_gm20b_pmu(g); 1245 start_gm20b_pmu(g);
1307 return 0; 1246 return 0;
1308err_unmap_bl: 1247err_unmap_bl:
1309 gk20a_gmmu_unmap(vm, acr->hsbl_ucode.pmu_va, 1248 gk20a_gmmu_unmap(vm, acr->hsbl_ucode.gpu_va,
1310 acr->hsbl_ucode.size, gk20a_mem_flag_none); 1249 acr->hsbl_ucode.size, gk20a_mem_flag_none);
1311err_free_ucode_sgt: 1250err_free_ucode:
1312 if (sgt_pmu_ucode) 1251 gk20a_gmmu_free(g, &acr->hsbl_ucode);
1313 gk20a_free_sgtable(&sgt_pmu_ucode);
1314err_free_cpu_va:
1315 dma_free_attrs(d, acr->hsbl_ucode.size,
1316 acr->hsbl_ucode.cpuva, acr->hsbl_ucode.iova, &attrs);
1317err_done: 1252err_done:
1318 release_firmware(hsbl_fw); 1253 release_firmware(hsbl_fw);
1319 return err; 1254 return err;
diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.h b/drivers/gpu/nvgpu/gm20b/acr_gm20b.h
index e5d126f8..d26f91ff 100644
--- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.h
@@ -381,9 +381,9 @@ struct acr_gm20b {
381 u32 pmu_args; 381 u32 pmu_args;
382 const struct firmware *acr_fw; 382 const struct firmware *acr_fw;
383 struct flcn_acr_desc *acr_dmem_desc; 383 struct flcn_acr_desc *acr_dmem_desc;
384 struct pmu_mem_desc acr_ucode; 384 struct mem_desc acr_ucode;
385 const struct firmware *hsbl_fw; 385 const struct firmware *hsbl_fw;
386 struct pmu_mem_desc hsbl_ucode; 386 struct mem_desc hsbl_ucode;
387 struct flcn_bl_dmem_desc bl_dmem_desc; 387 struct flcn_bl_dmem_desc bl_dmem_desc;
388 const struct firmware *pmu_fw; 388 const struct firmware *pmu_fw;
389 const struct firmware *pmu_desc; 389 const struct firmware *pmu_desc;
diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
index 6d186c10..10d2a13e 100644
--- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
@@ -24,7 +24,7 @@ static void channel_gm20b_bind(struct channel_gk20a *ch_gk20a)
24{ 24{
25 struct gk20a *g = ch_gk20a->g; 25 struct gk20a *g = ch_gk20a->g;
26 26
27 u32 inst_ptr = ch_gk20a->inst_block.cpu_pa 27 u32 inst_ptr = sg_phys(ch_gk20a->inst_block.sgt->sgl)
28 >> ram_in_base_shift_v(); 28 >> ram_in_base_shift_v();
29 29
30 gk20a_dbg_info("bind channel %d inst ptr 0x%08x", 30 gk20a_dbg_info("bind channel %d inst ptr 0x%08x",
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index c199964f..5ade9e6c 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -656,7 +656,7 @@ static u32 gr_gm20b_get_tpc_num(u32 addr)
656static void gr_gm20b_load_gpccs_with_bootloader(struct gk20a *g) 656static void gr_gm20b_load_gpccs_with_bootloader(struct gk20a *g)
657{ 657{
658 struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; 658 struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
659 u64 addr_base = ucode_info->ucode_gpuva; 659 u64 addr_base = ucode_info->surface_desc.gpu_va;
660 660
661 gr_gk20a_load_falcon_bind_instblk(g); 661 gr_gk20a_load_falcon_bind_instblk(g);
662 662
diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
index 45d956a2..25e6e4c7 100644
--- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
@@ -196,19 +196,11 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
196 196
197 runlist_size = sizeof(u16) * f->num_channels; 197 runlist_size = sizeof(u16) * f->num_channels;
198 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { 198 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
199 dma_addr_t iova; 199 int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]);
200 200 if (err) {
201 runlist->mem[i].cpuva =
202 dma_alloc_coherent(d,
203 runlist_size,
204 &iova,
205 GFP_KERNEL);
206 if (!runlist->mem[i].cpuva) {
207 dev_err(d, "memory allocation failed\n"); 201 dev_err(d, "memory allocation failed\n");
208 goto clean_up_runlist; 202 goto clean_up_runlist;
209 } 203 }
210 runlist->mem[i].iova = iova;
211 runlist->mem[i].size = runlist_size;
212 } 204 }
213 mutex_init(&runlist->mutex); 205 mutex_init(&runlist->mutex);
214 206
@@ -220,15 +212,8 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
220 return 0; 212 return 0;
221 213
222clean_up_runlist: 214clean_up_runlist:
223 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { 215 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++)
224 if (runlist->mem[i].cpuva) 216 gk20a_gmmu_free(g, &runlist->mem[i]);
225 dma_free_coherent(d,
226 runlist->mem[i].size,
227 runlist->mem[i].cpuva,
228 runlist->mem[i].iova);
229 runlist->mem[i].cpuva = NULL;
230 runlist->mem[i].iova = 0;
231 }
232 217
233 kfree(runlist->active_channels); 218 kfree(runlist->active_channels);
234 runlist->active_channels = NULL; 219 runlist->active_channels = NULL;
@@ -248,7 +233,6 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g)
248 struct fifo_gk20a *f = &g->fifo; 233 struct fifo_gk20a *f = &g->fifo;
249 struct device *d = dev_from_gk20a(g); 234 struct device *d = dev_from_gk20a(g);
250 int chid, err = 0; 235 int chid, err = 0;
251 dma_addr_t iova;
252 236
253 gk20a_dbg_fn(""); 237 gk20a_dbg_fn("");
254 238
@@ -268,28 +252,16 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g)
268 f->max_engines = ENGINE_INVAL_GK20A; 252 f->max_engines = ENGINE_INVAL_GK20A;
269 253
270 f->userd_entry_size = 1 << ram_userd_base_shift_v(); 254 f->userd_entry_size = 1 << ram_userd_base_shift_v();
271 f->userd_total_size = f->userd_entry_size * f->num_channels;
272 255
273 f->userd.cpuva = dma_alloc_coherent(d, 256 err = gk20a_gmmu_alloc(g, f->userd_entry_size * f->num_channels,
274 f->userd_total_size, 257 &f->userd);
275 &iova,
276 GFP_KERNEL);
277 if (!f->userd.cpuva) {
278 dev_err(d, "memory allocation failed\n");
279 goto clean_up;
280 }
281
282 f->userd.iova = iova;
283 err = gk20a_get_sgtable(d, &f->userd.sgt,
284 f->userd.cpuva, f->userd.iova,
285 f->userd_total_size);
286 if (err) { 258 if (err) {
287 dev_err(d, "failed to create sg table\n"); 259 dev_err(d, "memory allocation failed\n");
288 goto clean_up; 260 goto clean_up;
289 } 261 }
290 262
291 /* bar1 va */ 263 /* bar1 va */
292 f->userd.gpu_va = vgpu_bar1_map(g, &f->userd.sgt, f->userd_total_size); 264 f->userd.gpu_va = vgpu_bar1_map(g, &f->userd.sgt, f->userd.size);
293 if (!f->userd.gpu_va) { 265 if (!f->userd.gpu_va) {
294 dev_err(d, "gmmu mapping failed\n"); 266 dev_err(d, "gmmu mapping failed\n");
295 goto clean_up; 267 goto clean_up;
@@ -297,8 +269,6 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g)
297 269
298 gk20a_dbg(gpu_dbg_map, "userd bar1 va = 0x%llx", f->userd.gpu_va); 270 gk20a_dbg(gpu_dbg_map, "userd bar1 va = 0x%llx", f->userd.gpu_va);
299 271
300 f->userd.size = f->userd_total_size;
301
302 f->channel = kzalloc(f->num_channels * sizeof(*f->channel), 272 f->channel = kzalloc(f->num_channels * sizeof(*f->channel),
303 GFP_KERNEL); 273 GFP_KERNEL);
304 f->engine_info = kzalloc(f->max_engines * sizeof(*f->engine_info), 274 f->engine_info = kzalloc(f->max_engines * sizeof(*f->engine_info),
@@ -315,9 +285,9 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g)
315 285
316 for (chid = 0; chid < f->num_channels; chid++) { 286 for (chid = 0; chid < f->num_channels; chid++) {
317 f->channel[chid].userd_cpu_va = 287 f->channel[chid].userd_cpu_va =
318 f->userd.cpuva + chid * f->userd_entry_size; 288 f->userd.cpu_va + chid * f->userd_entry_size;
319 f->channel[chid].userd_iova = 289 f->channel[chid].userd_iova =
320 gk20a_mm_smmu_vaddr_translate(g, f->userd.iova) 290 gk20a_mm_iova_addr(g, f->userd.sgt->sgl)
321 + chid * f->userd_entry_size; 291 + chid * f->userd_entry_size;
322 f->channel[chid].userd_gpu_va = 292 f->channel[chid].userd_gpu_va =
323 f->userd.gpu_va + chid * f->userd_entry_size; 293 f->userd.gpu_va + chid * f->userd_entry_size;
@@ -337,17 +307,9 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g)
337clean_up: 307clean_up:
338 gk20a_dbg_fn("fail"); 308 gk20a_dbg_fn("fail");
339 /* FIXME: unmap from bar1 */ 309 /* FIXME: unmap from bar1 */
340 if (f->userd.sgt) 310 gk20a_gmmu_free(g, &f->userd);
341 gk20a_free_sgtable(&f->userd.sgt); 311
342 if (f->userd.cpuva) 312 memset(&f->userd, 0, sizeof(f->userd));
343 dma_free_coherent(d,
344 f->userd_total_size,
345 f->userd.cpuva,
346 f->userd.iova);
347 f->userd.cpuva = NULL;
348 f->userd.iova = 0;
349
350 memset(&f->userd, 0, sizeof(struct userd_desc));
351 313
352 kfree(f->channel); 314 kfree(f->channel);
353 f->channel = NULL; 315 f->channel = NULL;
@@ -368,7 +330,7 @@ static int vgpu_init_fifo_setup_hw(struct gk20a *g)
368 u32 v, v1 = 0x33, v2 = 0x55; 330 u32 v, v1 = 0x33, v2 = 0x55;
369 331
370 u32 bar1_vaddr = f->userd.gpu_va; 332 u32 bar1_vaddr = f->userd.gpu_va;
371 volatile u32 *cpu_vaddr = f->userd.cpuva; 333 volatile u32 *cpu_vaddr = f->userd.cpu_va;
372 334
373 gk20a_dbg_info("test bar1 @ vaddr 0x%x", 335 gk20a_dbg_info("test bar1 @ vaddr 0x%x",
374 bar1_vaddr); 336 bar1_vaddr);
@@ -505,7 +467,7 @@ static int vgpu_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
505 add /* resume to add all channels back */) { 467 add /* resume to add all channels back */) {
506 u32 chid; 468 u32 chid;
507 469
508 runlist_entry = runlist->mem[0].cpuva; 470 runlist_entry = runlist->mem[0].cpu_va;
509 for_each_set_bit(chid, 471 for_each_set_bit(chid,
510 runlist->active_channels, f->num_channels) { 472 runlist->active_channels, f->num_channels) {
511 gk20a_dbg_info("add channel %d to runlist", chid); 473 gk20a_dbg_info("add channel %d to runlist", chid);
@@ -517,7 +479,7 @@ static int vgpu_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
517 count = 0; 479 count = 0;
518 480
519 return vgpu_submit_runlist(platform->virt_handle, runlist_id, 481 return vgpu_submit_runlist(platform->virt_handle, runlist_id,
520 runlist->mem[0].cpuva, count); 482 runlist->mem[0].cpu_va, count);
521} 483}
522 484
523/* add/remove a channel from runlist 485/* add/remove a channel from runlist