summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c95
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c1
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h6
-rw-r--r--drivers/gpu/nvgpu/vgpu/gr_vgpu.c21
4 files changed, 37 insertions, 86 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 49b70767..293ccd97 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -597,16 +597,16 @@ int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
597 struct channel_ctx_gk20a *ch_ctx) 597 struct channel_ctx_gk20a *ch_ctx)
598{ 598{
599 /* being defensive still... */ 599 /* being defensive still... */
600 if (WARN_ON(ch_ctx->patch_ctx.cpu_va)) { 600 if (WARN_ON(ch_ctx->patch_ctx.mem.cpu_va)) {
601 gk20a_err(dev_from_gk20a(g), "nested ctx patch begin?"); 601 gk20a_err(dev_from_gk20a(g), "nested ctx patch begin?");
602 return -EBUSY; 602 return -EBUSY;
603 } 603 }
604 604
605 ch_ctx->patch_ctx.cpu_va = vmap(ch_ctx->patch_ctx.pages, 605 ch_ctx->patch_ctx.mem.cpu_va = vmap(ch_ctx->patch_ctx.mem.pages,
606 PAGE_ALIGN(ch_ctx->patch_ctx.size) >> PAGE_SHIFT, 606 PAGE_ALIGN(ch_ctx->patch_ctx.mem.size) >> PAGE_SHIFT,
607 0, pgprot_writecombine(PAGE_KERNEL)); 607 0, pgprot_writecombine(PAGE_KERNEL));
608 608
609 if (!ch_ctx->patch_ctx.cpu_va) 609 if (!ch_ctx->patch_ctx.mem.cpu_va)
610 return -ENOMEM; 610 return -ENOMEM;
611 611
612 return 0; 612 return 0;
@@ -616,13 +616,13 @@ int gr_gk20a_ctx_patch_write_end(struct gk20a *g,
616 struct channel_ctx_gk20a *ch_ctx) 616 struct channel_ctx_gk20a *ch_ctx)
617{ 617{
618 /* being defensive still... */ 618 /* being defensive still... */
619 if (!ch_ctx->patch_ctx.cpu_va) { 619 if (!ch_ctx->patch_ctx.mem.cpu_va) {
620 gk20a_err(dev_from_gk20a(g), "dangling ctx patch end?"); 620 gk20a_err(dev_from_gk20a(g), "dangling ctx patch end?");
621 return -EINVAL; 621 return -EINVAL;
622 } 622 }
623 623
624 vunmap(ch_ctx->patch_ctx.cpu_va); 624 vunmap(ch_ctx->patch_ctx.mem.cpu_va);
625 ch_ctx->patch_ctx.cpu_va = NULL; 625 ch_ctx->patch_ctx.mem.cpu_va = NULL;
626 return 0; 626 return 0;
627} 627}
628 628
@@ -642,7 +642,7 @@ int gr_gk20a_ctx_patch_write(struct gk20a *g,
642 /* we added an optimization prolog, epilog 642 /* we added an optimization prolog, epilog
643 * to get rid of unnecessary maps and l2 invals. 643 * to get rid of unnecessary maps and l2 invals.
644 * but be defensive still... */ 644 * but be defensive still... */
645 if (!ch_ctx->patch_ctx.cpu_va) { 645 if (!ch_ctx->patch_ctx.mem.cpu_va) {
646 int err; 646 int err;
647 gk20a_err(dev_from_gk20a(g), 647 gk20a_err(dev_from_gk20a(g),
648 "per-write ctx patch begin?"); 648 "per-write ctx patch begin?");
@@ -654,7 +654,7 @@ int gr_gk20a_ctx_patch_write(struct gk20a *g,
654 } else 654 } else
655 mapped_here = false; 655 mapped_here = false;
656 656
657 patch_ptr = ch_ctx->patch_ctx.cpu_va; 657 patch_ptr = ch_ctx->patch_ctx.mem.cpu_va;
658 patch_slot = ch_ctx->patch_ctx.data_count * 2; 658 patch_slot = ch_ctx->patch_ctx.data_count * 2;
659 659
660 gk20a_mem_wr32(patch_ptr, patch_slot++, addr); 660 gk20a_mem_wr32(patch_ptr, patch_slot++, addr);
@@ -1622,8 +1622,8 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1622 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_save_ops_o(), 0, 0); 1622 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_save_ops_o(), 0, 0);
1623 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_restore_ops_o(), 0, 0); 1623 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_restore_ops_o(), 0, 0);
1624 1624
1625 virt_addr_lo = u64_lo32(ch_ctx->patch_ctx.gpu_va); 1625 virt_addr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va);
1626 virt_addr_hi = u64_hi32(ch_ctx->patch_ctx.gpu_va); 1626 virt_addr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va);
1627 1627
1628 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_count_o(), 0, 1628 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_count_o(), 0,
1629 ch_ctx->patch_ctx.data_count); 1629 ch_ctx->patch_ctx.data_count);
@@ -2568,82 +2568,33 @@ static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g,
2568 struct channel_gk20a *c) 2568 struct channel_gk20a *c)
2569{ 2569{
2570 struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; 2570 struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx;
2571 struct device *d = dev_from_gk20a(g);
2572 struct vm_gk20a *ch_vm = c->vm; 2571 struct vm_gk20a *ch_vm = c->vm;
2573 DEFINE_DMA_ATTRS(attrs);
2574 struct sg_table *sgt;
2575 int err = 0; 2572 int err = 0;
2576 dma_addr_t iova;
2577 2573
2578 gk20a_dbg_fn(""); 2574 gk20a_dbg_fn("");
2579 2575
2580 patch_ctx->size = 128 * sizeof(u32); 2576 err = gk20a_gmmu_alloc_map_attr(ch_vm, DMA_ATTR_NO_KERNEL_MAPPING,
2581 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); 2577 128 * sizeof(u32), &patch_ctx->mem);
2582 patch_ctx->pages = dma_alloc_attrs(d, patch_ctx->size,
2583 &iova, GFP_KERNEL,
2584 &attrs);
2585 if (!patch_ctx->pages)
2586 return -ENOMEM;
2587
2588 patch_ctx->iova = iova;
2589 err = gk20a_get_sgtable_from_pages(d, &sgt, patch_ctx->pages,
2590 patch_ctx->iova, patch_ctx->size);
2591 if (err) 2578 if (err)
2592 goto err_free; 2579 return err;
2593
2594 patch_ctx->gpu_va = gk20a_gmmu_map(ch_vm, &sgt, patch_ctx->size,
2595 0, gk20a_mem_flag_none);
2596 if (!patch_ctx->gpu_va)
2597 goto err_free_sgtable;
2598
2599 gk20a_free_sgtable(&sgt);
2600 2580
2601 gk20a_dbg_fn("done"); 2581 gk20a_dbg_fn("done");
2602 return 0; 2582 return 0;
2603
2604 err_free_sgtable:
2605 gk20a_free_sgtable(&sgt);
2606 err_free:
2607 dma_free_attrs(d, patch_ctx->size,
2608 patch_ctx->pages, patch_ctx->iova, &attrs);
2609 patch_ctx->pages = NULL;
2610 patch_ctx->iova = 0;
2611 gk20a_err(dev_from_gk20a(g), "fail");
2612 return err;
2613}
2614
2615static void gr_gk20a_unmap_channel_patch_ctx(struct channel_gk20a *c)
2616{
2617 struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx;
2618 struct vm_gk20a *ch_vm = c->vm;
2619
2620 gk20a_dbg_fn("");
2621
2622 if (patch_ctx->gpu_va)
2623 gk20a_gmmu_unmap(ch_vm, patch_ctx->gpu_va,
2624 patch_ctx->size, gk20a_mem_flag_none);
2625 patch_ctx->gpu_va = 0;
2626 patch_ctx->data_count = 0;
2627} 2583}
2628 2584
2629static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c) 2585static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c)
2630{ 2586{
2631 struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; 2587 struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx;
2632 struct gk20a *g = c->g; 2588 struct gk20a *g = c->g;
2633 struct device *d = dev_from_gk20a(g);
2634 DEFINE_DMA_ATTRS(attrs);
2635 2589
2636 gk20a_dbg_fn(""); 2590 gk20a_dbg_fn("");
2637 2591
2638 gr_gk20a_unmap_channel_patch_ctx(c); 2592 if (patch_ctx->mem.gpu_va)
2593 gk20a_gmmu_unmap(c->vm, patch_ctx->mem.gpu_va,
2594 patch_ctx->mem.size, gk20a_mem_flag_none);
2639 2595
2640 if (patch_ctx->pages) { 2596 gk20a_gmmu_free_attr(g, DMA_ATTR_NO_KERNEL_MAPPING, &patch_ctx->mem);
2641 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); 2597 patch_ctx->data_count = 0;
2642 dma_free_attrs(d, patch_ctx->size,
2643 patch_ctx->pages, patch_ctx->iova, &attrs);
2644 patch_ctx->pages = NULL;
2645 patch_ctx->iova = 0;
2646 }
2647} 2598}
2648 2599
2649void gk20a_free_channel_ctx(struct channel_gk20a *c) 2600void gk20a_free_channel_ctx(struct channel_gk20a *c)
@@ -2757,7 +2708,7 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c,
2757 } 2708 }
2758 2709
2759 /* allocate patch buffer */ 2710 /* allocate patch buffer */
2760 if (ch_ctx->patch_ctx.pages == NULL) { 2711 if (ch_ctx->patch_ctx.mem.sgt == NULL) {
2761 err = gr_gk20a_alloc_channel_patch_ctx(g, c); 2712 err = gr_gk20a_alloc_channel_patch_ctx(g, c);
2762 if (err) { 2713 if (err) {
2763 gk20a_err(dev_from_gk20a(g), 2714 gk20a_err(dev_from_gk20a(g),
@@ -2883,7 +2834,7 @@ int gk20a_free_obj_ctx(struct channel_gk20a *c,
2883 gk20a_disable_channel(c, 2834 gk20a_disable_channel(c,
2884 !c->has_timedout, 2835 !c->has_timedout,
2885 timeout); 2836 timeout);
2886 gr_gk20a_unmap_channel_patch_ctx(c); 2837 gr_gk20a_free_channel_patch_ctx(c);
2887 } 2838 }
2888 2839
2889 return 0; 2840 return 0;
@@ -6081,8 +6032,8 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
6081 gr_gk20a_ctx_patch_write(g, ch_ctx, 6032 gr_gk20a_ctx_patch_write(g, ch_ctx,
6082 addr, data, true); 6033 addr, data, true);
6083 6034
6084 vaddr_lo = u64_lo32(ch_ctx->patch_ctx.gpu_va); 6035 vaddr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va);
6085 vaddr_hi = u64_hi32(ch_ctx->patch_ctx.gpu_va); 6036 vaddr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va);
6086 6037
6087 gk20a_mem_wr32(context + 6038 gk20a_mem_wr32(context +
6088 ctxsw_prog_main_image_patch_count_o(), 6039 ctxsw_prog_main_image_patch_count_o(),
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index fa84337b..bde8afae 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1616,6 +1616,7 @@ void gk20a_gmmu_free_attr(struct gk20a *g, enum dma_attr attr,
1616 } 1616 }
1617 1617
1618 mem->cpu_va = NULL; 1618 mem->cpu_va = NULL;
1619 mem->pages = NULL;
1619 } 1620 }
1620 1621
1621 if (mem->sgt) 1622 if (mem->sgt)
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 91659537..df9ebec3 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -68,11 +68,7 @@ struct gpfifo_desc {
68}; 68};
69 69
70struct patch_desc { 70struct patch_desc {
71 struct page **pages; 71 struct mem_desc mem;
72 u64 iova;
73 size_t size;
74 void *cpu_va;
75 u64 gpu_va;
76 u32 data_count; 72 u32 data_count;
77}; 73};
78 74
diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
index fd8bb81b..8d0bb6cf 100644
--- a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
@@ -321,18 +321,20 @@ static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g,
321 321
322 gk20a_dbg_fn(""); 322 gk20a_dbg_fn("");
323 323
324 patch_ctx->size = 128 * sizeof(u32); 324 patch_ctx->mem.size = 128 * sizeof(u32);
325 patch_ctx->gpu_va = gk20a_vm_alloc_va(ch_vm, patch_ctx->size, 0); 325 patch_ctx->mem.gpu_va = gk20a_vm_alloc_va(ch_vm,
326 if (!patch_ctx->gpu_va) 326 patch_ctx->mem.size, 0);
327 if (!patch_ctx->mem.gpu_va)
327 return -ENOMEM; 328 return -ENOMEM;
328 329
329 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_ALLOC_GR_PATCH_CTX; 330 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_ALLOC_GR_PATCH_CTX;
330 msg.handle = platform->virt_handle; 331 msg.handle = platform->virt_handle;
331 p->handle = c->virt_ctx; 332 p->handle = c->virt_ctx;
332 p->patch_ctx_va = patch_ctx->gpu_va; 333 p->patch_ctx_va = patch_ctx->mem.gpu_va;
333 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); 334 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
334 if (err || msg.ret) { 335 if (err || msg.ret) {
335 gk20a_vm_free_va(ch_vm, patch_ctx->gpu_va, patch_ctx->size, 0); 336 gk20a_vm_free_va(ch_vm, patch_ctx->mem.gpu_va,
337 patch_ctx->mem.size, 0);
336 err = -ENOMEM; 338 err = -ENOMEM;
337 } 339 }
338 340
@@ -347,7 +349,7 @@ static void vgpu_gr_free_channel_patch_ctx(struct channel_gk20a *c)
347 349
348 gk20a_dbg_fn(""); 350 gk20a_dbg_fn("");
349 351
350 if (patch_ctx->gpu_va) { 352 if (patch_ctx->mem.gpu_va) {
351 struct tegra_vgpu_cmd_msg msg; 353 struct tegra_vgpu_cmd_msg msg;
352 struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; 354 struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
353 int err; 355 int err;
@@ -358,8 +360,9 @@ static void vgpu_gr_free_channel_patch_ctx(struct channel_gk20a *c)
358 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); 360 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
359 WARN_ON(err || msg.ret); 361 WARN_ON(err || msg.ret);
360 362
361 gk20a_vm_free_va(ch_vm, patch_ctx->gpu_va, patch_ctx->size, 0); 363 gk20a_vm_free_va(ch_vm, patch_ctx->mem.gpu_va,
362 patch_ctx->gpu_va = 0; 364 patch_ctx->mem.size, 0);
365 patch_ctx->mem.gpu_va = 0;
363 } 366 }
364} 367}
365 368
@@ -438,7 +441,7 @@ static int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c,
438 } 441 }
439 442
440 /* allocate patch buffer */ 443 /* allocate patch buffer */
441 if (ch_ctx->patch_ctx.pages == NULL) { 444 if (ch_ctx->patch_ctx.mem.pages == NULL) {
442 err = vgpu_gr_alloc_channel_patch_ctx(g, c); 445 err = vgpu_gr_alloc_channel_patch_ctx(g, c);
443 if (err) { 446 if (err) {
444 gk20a_err(dev_from_gk20a(g), 447 gk20a_err(dev_from_gk20a(g),