summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c35
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.h3
2 files changed, 35 insertions, 3 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 1b9ecd86..241e6525 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -673,7 +673,21 @@ int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va)
673int gr_gk20a_ctx_patch_write_begin(struct gk20a *g, 673int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
674 struct channel_ctx_gk20a *ch_ctx) 674 struct channel_ctx_gk20a *ch_ctx)
675{ 675{
676 return nvgpu_mem_begin(g, &ch_ctx->patch_ctx.mem); 676 int err = 0;
677
678 err = nvgpu_mem_begin(g, &ch_ctx->patch_ctx.mem);
679 if (err)
680 return err;
681
682 if (ch_ctx->gr_ctx->mem.cpu_va) {
683 /* reset patch count if ucode has already processed it */
684 ch_ctx->patch_ctx.data_count = nvgpu_mem_rd(g,
685 &ch_ctx->gr_ctx->mem,
686 ctxsw_prog_main_image_patch_count_o());
687 nvgpu_log(g, gpu_dbg_info, "patch count reset to %d",
688 ch_ctx->patch_ctx.data_count);
689 }
690 return 0;
677} 691}
678 692
679void gr_gk20a_ctx_patch_write_end(struct gk20a *g, 693void gr_gk20a_ctx_patch_write_end(struct gk20a *g,
@@ -686,6 +700,8 @@ void gr_gk20a_ctx_patch_write_end(struct gk20a *g,
686 nvgpu_mem_wr(g, &ch_ctx->gr_ctx->mem, 700 nvgpu_mem_wr(g, &ch_ctx->gr_ctx->mem,
687 ctxsw_prog_main_image_patch_count_o(), 701 ctxsw_prog_main_image_patch_count_o(),
688 ch_ctx->patch_ctx.data_count); 702 ch_ctx->patch_ctx.data_count);
703 nvgpu_log(g, gpu_dbg_info, "write patch count %d",
704 ch_ctx->patch_ctx.data_count);
689 } 705 }
690} 706}
691 707
@@ -694,10 +710,20 @@ void gr_gk20a_ctx_patch_write(struct gk20a *g,
694 u32 addr, u32 data, bool patch) 710 u32 addr, u32 data, bool patch)
695{ 711{
696 if (patch) { 712 if (patch) {
697 u32 patch_slot = ch_ctx->patch_ctx.data_count * 2; 713 u32 patch_slot = ch_ctx->patch_ctx.data_count *
714 PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY;
715 if (patch_slot > (PATCH_CTX_SLOTS_MAX -
716 PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY)) {
717 nvgpu_err(g, "failed to access patch_slot %d",
718 patch_slot);
719 return;
720 }
698 nvgpu_mem_wr32(g, &ch_ctx->patch_ctx.mem, patch_slot, addr); 721 nvgpu_mem_wr32(g, &ch_ctx->patch_ctx.mem, patch_slot, addr);
699 nvgpu_mem_wr32(g, &ch_ctx->patch_ctx.mem, patch_slot + 1, data); 722 nvgpu_mem_wr32(g, &ch_ctx->patch_ctx.mem, patch_slot + 1, data);
700 ch_ctx->patch_ctx.data_count++; 723 ch_ctx->patch_ctx.data_count++;
724 nvgpu_log(g, gpu_dbg_info,
725 "patch addr = 0x%x data = 0x%x data_count %d",
726 addr, data, ch_ctx->patch_ctx.data_count);
701 } else { 727 } else {
702 gk20a_writel(g, addr, data); 728 gk20a_writel(g, addr, data);
703 } 729 }
@@ -1875,6 +1901,8 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1875 virt_addr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va); 1901 virt_addr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va);
1876 virt_addr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va); 1902 virt_addr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va);
1877 1903
1904 nvgpu_log(g, gpu_dbg_info, "write patch count = %d",
1905 ch_ctx->patch_ctx.data_count);
1878 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_patch_count_o(), 1906 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_patch_count_o(),
1879 ch_ctx->patch_ctx.data_count); 1907 ch_ctx->patch_ctx.data_count);
1880 1908
@@ -2793,7 +2821,7 @@ static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g,
2793 gk20a_dbg_fn(""); 2821 gk20a_dbg_fn("");
2794 2822
2795 err = nvgpu_dma_alloc_map_flags_sys(ch_vm, NVGPU_DMA_NO_KERNEL_MAPPING, 2823 err = nvgpu_dma_alloc_map_flags_sys(ch_vm, NVGPU_DMA_NO_KERNEL_MAPPING,
2796 128 * sizeof(u32), &patch_ctx->mem); 2824 PATCH_CTX_SLOTS_MAX * sizeof(u32), &patch_ctx->mem);
2797 if (err) 2825 if (err)
2798 return err; 2826 return err;
2799 2827
@@ -2928,6 +2956,7 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c,
2928 2956
2929 /* allocate patch buffer */ 2957 /* allocate patch buffer */
2930 if (ch_ctx->patch_ctx.mem.priv.sgt == NULL) { 2958 if (ch_ctx->patch_ctx.mem.priv.sgt == NULL) {
2959 ch_ctx->patch_ctx.data_count = 0;
2931 err = gr_gk20a_alloc_channel_patch_ctx(g, c); 2960 err = gr_gk20a_alloc_channel_patch_ctx(g, c);
2932 if (err) { 2961 if (err) {
2933 nvgpu_err(g, 2962 nvgpu_err(g,
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 84eb8970..52b39c4f 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -52,6 +52,9 @@
52 52
53#define GK20A_TIMEOUT_FPGA 100000 /* 100 sec */ 53#define GK20A_TIMEOUT_FPGA 100000 /* 100 sec */
54 54
55#define PATCH_CTX_SLOTS_MAX 128
56#define PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY 2
57
55struct channel_gk20a; 58struct channel_gk20a;
56struct nvgpu_warpstate; 59struct nvgpu_warpstate;
57 60