gpu: nvgpu: reset patch_ctx.data_count

Patch buffer can hold 128 u32 entries. Each patch write takes total of 2 u32 entries, 1 u32 for addr and 1 u32 for data. Ideally 64 entries could be written before buffer overflows. Driver patch some things when creating the channel, and later when context switch type is changed after channel is loaded. Reset patch_ctx.data_count before beginning patch write otherwise system might not be in a state to accept all patch writes even if patch buffer has valid entries. If the patch buffer has non-zero entries, then the patch buffer would be read and all pri writes would be sent out. Once done, ucode updates the main header patch buffer count to 0. Without this fix, below priv errors seen on t186 platforms SYS Write error for ADR 0, INFO 0d000200 and CODE badf1100 Error info decodes as: NV_PPRIV_SYS_PRIV_ERROR_INFO R[0x00122128] SUBID [29:24] 13 (?) LOCAL_ORDERING [22:22] 0 (I) PRIV_LEVEL [21:20] 0 (I) SENDING_RS [17:12] 0 (I) PENDING [ 9: 9] 1 (?) ORPHAN [ 8: 8] 0 (I) PRIV_MASTER [ 5: 0] 0 (I) Ctxsw ucode(subid 13 i.e. 0xd) makes only few pri transactions at priv level 0. Patch buffer pri writes are one of those. Bug 200350539 Change-Id: If9e71b5fef4d85600d72a8a633a082d9261c3e1b Signed-off-by: Seema Khowala <seemaj@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1581591 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Seema Khowala <seemaj@nvidia.com> 2017-10-16 17:04:28 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2017-10-21 19:05:36 -0400
commit: 5ff6ebd2f49cb2db01e21ed2403231c23a30c3ad (patch)
tree: ff4cfcf413218843b4838d6e6f644b8cef649335 /drivers/gpu/nvgpu/gk20a/gr_gk20a.c
parent: 2d4024b0e6ed9e0bec144a3f56c8c0a6b3ab8f96 (diff)
1 files changed, 32 insertions, 3 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 1b9ecd86..241e6525 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -673,7 +673,21 @@ int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va)
 int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
                                          struct channel_ctx_gk20a *ch_ctx)
 {
-        return nvgpu_mem_begin(g, &ch_ctx->patch_ctx.mem);
+        int err = 0;
+        err = nvgpu_mem_begin(g, &ch_ctx->patch_ctx.mem);
+        if (err)
+                return err;
+        if (ch_ctx->gr_ctx->mem.cpu_va) {
+                /* reset patch count if ucode has already processed it */
+                ch_ctx->patch_ctx.data_count = nvgpu_mem_rd(g,
+                                                &ch_ctx->gr_ctx->mem,
+                                        ctxsw_prog_main_image_patch_count_o());
+                nvgpu_log(g, gpu_dbg_info, "patch count reset to %d",
+                                        ch_ctx->patch_ctx.data_count);
+        }
+        return 0;
 }
 void gr_gk20a_ctx_patch_write_end(struct gk20a *g,
@@ -686,6 +700,8 @@ void gr_gk20a_ctx_patch_write_end(struct gk20a *g,
                nvgpu_mem_wr(g, &ch_ctx->gr_ctx->mem,
                             ctxsw_prog_main_image_patch_count_o(),
                             ch_ctx->patch_ctx.data_count);
+                nvgpu_log(g, gpu_dbg_info, "write patch count %d",
+                        ch_ctx->patch_ctx.data_count);
        }
 }
@@ -694,10 +710,20 @@ void gr_gk20a_ctx_patch_write(struct gk20a *g,
                                    u32 addr, u32 data, bool patch)
 {
        if (patch) {
-                u32 patch_slot = ch_ctx->patch_ctx.data_count * 2;
+                u32 patch_slot = ch_ctx->patch_ctx.data_count *
+                                PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY;
+                if (patch_slot > (PATCH_CTX_SLOTS_MAX -
+                                PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY)) {
+                        nvgpu_err(g, "failed to access patch_slot %d",
+                                patch_slot);
+                        return;
+                }
                nvgpu_mem_wr32(g, &ch_ctx->patch_ctx.mem, patch_slot, addr);
                nvgpu_mem_wr32(g, &ch_ctx->patch_ctx.mem, patch_slot + 1, data);
                ch_ctx->patch_ctx.data_count++;
+                nvgpu_log(g, gpu_dbg_info,
+                        "patch addr = 0x%x data = 0x%x data_count %d",
+                        addr, data, ch_ctx->patch_ctx.data_count);
        } else {
                gk20a_writel(g, addr, data);
        }
@@ -1875,6 +1901,8 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
        virt_addr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va);
        virt_addr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va);
+        nvgpu_log(g, gpu_dbg_info, "write patch count = %d",
+                        ch_ctx->patch_ctx.data_count);
        nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_patch_count_o(),
                 ch_ctx->patch_ctx.data_count);
@@ -2793,7 +2821,7 @@ static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g,
        gk20a_dbg_fn("");
        err = nvgpu_dma_alloc_map_flags_sys(ch_vm, NVGPU_DMA_NO_KERNEL_MAPPING,
-                                        128 * sizeof(u32), &patch_ctx->mem);
+                        PATCH_CTX_SLOTS_MAX * sizeof(u32), &patch_ctx->mem);
        if (err)
                return err;
@@ -2928,6 +2956,7 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a  *c,
        /* allocate patch buffer */
        if (ch_ctx->patch_ctx.mem.priv.sgt == NULL) {
+                ch_ctx->patch_ctx.data_count = 0;
                err = gr_gk20a_alloc_channel_patch_ctx(g, c);
                if (err) {
                        nvgpu_err(g,
author	Seema Khowala <seemaj@nvidia.com>	2017-10-16 17:04:28 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2017-10-21 19:05:36 -0400
commit	5ff6ebd2f49cb2db01e21ed2403231c23a30c3ad (patch)
tree	ff4cfcf413218843b4838d6e6f644b8cef649335 /drivers/gpu/nvgpu/gk20a/gr_gk20a.c
parent	2d4024b0e6ed9e0bec144a3f56c8c0a6b3ab8f96 (diff)