2 files changed, 35 insertions, 3 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 1b9ecd86..241e6525 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -673,7 +673,21 @@ int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va)
 int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
                                          struct channel_ctx_gk20a *ch_ctx)
 {
-        return nvgpu_mem_begin(g, &ch_ctx->patch_ctx.mem);
+        int err = 0;
+        err = nvgpu_mem_begin(g, &ch_ctx->patch_ctx.mem);
+        if (err)
+                return err;
+        if (ch_ctx->gr_ctx->mem.cpu_va) {
+                /* reset patch count if ucode has already processed it */
+                ch_ctx->patch_ctx.data_count = nvgpu_mem_rd(g,
+                                                &ch_ctx->gr_ctx->mem,
+                                        ctxsw_prog_main_image_patch_count_o());
+                nvgpu_log(g, gpu_dbg_info, "patch count reset to %d",
+                                        ch_ctx->patch_ctx.data_count);
+        }
+        return 0;
 }
 void gr_gk20a_ctx_patch_write_end(struct gk20a *g,
@@ -686,6 +700,8 @@ void gr_gk20a_ctx_patch_write_end(struct gk20a *g,
                nvgpu_mem_wr(g, &ch_ctx->gr_ctx->mem,
                             ctxsw_prog_main_image_patch_count_o(),
                             ch_ctx->patch_ctx.data_count);
+                nvgpu_log(g, gpu_dbg_info, "write patch count %d",
+                        ch_ctx->patch_ctx.data_count);
        }
 }
@@ -694,10 +710,20 @@ void gr_gk20a_ctx_patch_write(struct gk20a *g,
                                    u32 addr, u32 data, bool patch)
 {
        if (patch) {
-                u32 patch_slot = ch_ctx->patch_ctx.data_count * 2;
+                u32 patch_slot = ch_ctx->patch_ctx.data_count *
+                                PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY;
+                if (patch_slot > (PATCH_CTX_SLOTS_MAX -
+                                PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY)) {
+                        nvgpu_err(g, "failed to access patch_slot %d",
+                                patch_slot);
+                        return;
+                }
                nvgpu_mem_wr32(g, &ch_ctx->patch_ctx.mem, patch_slot, addr);
                nvgpu_mem_wr32(g, &ch_ctx->patch_ctx.mem, patch_slot + 1, data);
                ch_ctx->patch_ctx.data_count++;
+                nvgpu_log(g, gpu_dbg_info,
+                        "patch addr = 0x%x data = 0x%x data_count %d",
+                        addr, data, ch_ctx->patch_ctx.data_count);
        } else {
                gk20a_writel(g, addr, data);
        }
@@ -1875,6 +1901,8 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
        virt_addr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va);
        virt_addr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va);
+        nvgpu_log(g, gpu_dbg_info, "write patch count = %d",
+                        ch_ctx->patch_ctx.data_count);
        nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_patch_count_o(),
                 ch_ctx->patch_ctx.data_count);
@@ -2793,7 +2821,7 @@ static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g,
        gk20a_dbg_fn("");
        err = nvgpu_dma_alloc_map_flags_sys(ch_vm, NVGPU_DMA_NO_KERNEL_MAPPING,
-                                        128 * sizeof(u32), &patch_ctx->mem);
+                        PATCH_CTX_SLOTS_MAX * sizeof(u32), &patch_ctx->mem);
        if (err)
                return err;
@@ -2928,6 +2956,7 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a  *c,
        /* allocate patch buffer */
        if (ch_ctx->patch_ctx.mem.priv.sgt == NULL) {
+                ch_ctx->patch_ctx.data_count = 0;
                err = gr_gk20a_alloc_channel_patch_ctx(g, c);
                if (err) {
                        nvgpu_err(g,
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 84eb8970..52b39c4f 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -52,6 +52,9 @@
 #define GK20A_TIMEOUT_FPGA              100000 /* 100 sec */
+#define PATCH_CTX_SLOTS_MAX                     128
+#define PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY      2
 struct channel_gk20a;
 struct nvgpu_warpstate;