From 5ff6ebd2f49cb2db01e21ed2403231c23a30c3ad Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Mon, 16 Oct 2017 14:04:28 -0700 Subject: gpu: nvgpu: reset patch_ctx.data_count Patch buffer can hold 128 u32 entries. Each patch write takes total of 2 u32 entries, 1 u32 for addr and 1 u32 for data. Ideally 64 entries could be written before buffer overflows. Driver patch some things when creating the channel, and later when context switch type is changed after channel is loaded. Reset patch_ctx.data_count before beginning patch write otherwise system might not be in a state to accept all patch writes even if patch buffer has valid entries. If the patch buffer has non-zero entries, then the patch buffer would be read and all pri writes would be sent out. Once done, ucode updates the main header patch buffer count to 0. Without this fix, below priv errors seen on t186 platforms SYS Write error for ADR 0, INFO 0d000200 and CODE badf1100 Error info decodes as: NV_PPRIV_SYS_PRIV_ERROR_INFO R[0x00122128] SUBID [29:24] 13 (?) LOCAL_ORDERING [22:22] 0 (I) PRIV_LEVEL [21:20] 0 (I) SENDING_RS [17:12] 0 (I) PENDING [ 9: 9] 1 (?) ORPHAN [ 8: 8] 0 (I) PRIV_MASTER [ 5: 0] 0 (I) Ctxsw ucode(subid 13 i.e. 0xd) makes only few pri transactions at priv level 0. Patch buffer pri writes are one of those. Bug 200350539 Change-Id: If9e71b5fef4d85600d72a8a633a082d9261c3e1b Signed-off-by: Seema Khowala Reviewed-on: https://git-master.nvidia.com/r/1581591 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 35 ++++++++++++++++++++++++++++++++--- drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 3 +++ 2 files changed, 35 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 1b9ecd86..241e6525 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -673,7 +673,21 @@ int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va) int gr_gk20a_ctx_patch_write_begin(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx) { - return nvgpu_mem_begin(g, &ch_ctx->patch_ctx.mem); + int err = 0; + + err = nvgpu_mem_begin(g, &ch_ctx->patch_ctx.mem); + if (err) + return err; + + if (ch_ctx->gr_ctx->mem.cpu_va) { + /* reset patch count if ucode has already processed it */ + ch_ctx->patch_ctx.data_count = nvgpu_mem_rd(g, + &ch_ctx->gr_ctx->mem, + ctxsw_prog_main_image_patch_count_o()); + nvgpu_log(g, gpu_dbg_info, "patch count reset to %d", + ch_ctx->patch_ctx.data_count); + } + return 0; } void gr_gk20a_ctx_patch_write_end(struct gk20a *g, @@ -686,6 +700,8 @@ void gr_gk20a_ctx_patch_write_end(struct gk20a *g, nvgpu_mem_wr(g, &ch_ctx->gr_ctx->mem, ctxsw_prog_main_image_patch_count_o(), ch_ctx->patch_ctx.data_count); + nvgpu_log(g, gpu_dbg_info, "write patch count %d", + ch_ctx->patch_ctx.data_count); } } @@ -694,10 +710,20 @@ void gr_gk20a_ctx_patch_write(struct gk20a *g, u32 addr, u32 data, bool patch) { if (patch) { - u32 patch_slot = ch_ctx->patch_ctx.data_count * 2; + u32 patch_slot = ch_ctx->patch_ctx.data_count * + PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY; + if (patch_slot > (PATCH_CTX_SLOTS_MAX - + PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY)) { + nvgpu_err(g, "failed to access patch_slot %d", + patch_slot); + return; + } nvgpu_mem_wr32(g, &ch_ctx->patch_ctx.mem, patch_slot, addr); nvgpu_mem_wr32(g, &ch_ctx->patch_ctx.mem, patch_slot + 1, data); ch_ctx->patch_ctx.data_count++; + nvgpu_log(g, gpu_dbg_info, + "patch addr = 0x%x data = 0x%x data_count %d", + addr, data, ch_ctx->patch_ctx.data_count); } else { gk20a_writel(g, addr, data); } @@ -1875,6 +1901,8 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, virt_addr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va); virt_addr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va); + nvgpu_log(g, gpu_dbg_info, "write patch count = %d", + ch_ctx->patch_ctx.data_count); nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_patch_count_o(), ch_ctx->patch_ctx.data_count); @@ -2793,7 +2821,7 @@ static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g, gk20a_dbg_fn(""); err = nvgpu_dma_alloc_map_flags_sys(ch_vm, NVGPU_DMA_NO_KERNEL_MAPPING, - 128 * sizeof(u32), &patch_ctx->mem); + PATCH_CTX_SLOTS_MAX * sizeof(u32), &patch_ctx->mem); if (err) return err; @@ -2928,6 +2956,7 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, /* allocate patch buffer */ if (ch_ctx->patch_ctx.mem.priv.sgt == NULL) { + ch_ctx->patch_ctx.data_count = 0; err = gr_gk20a_alloc_channel_patch_ctx(g, c); if (err) { nvgpu_err(g, diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 84eb8970..52b39c4f 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -52,6 +52,9 @@ #define GK20A_TIMEOUT_FPGA 100000 /* 100 sec */ +#define PATCH_CTX_SLOTS_MAX 128 +#define PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY 2 + struct channel_gk20a; struct nvgpu_warpstate; -- cgit v1.2.2