From 5ff6ebd2f49cb2db01e21ed2403231c23a30c3ad Mon Sep 17 00:00:00 2001
From: Seema Khowala <seemaj@nvidia.com>
Date: Mon, 16 Oct 2017 14:04:28 -0700
Subject: gpu: nvgpu: reset patch_ctx.data_count

Patch buffer can hold 128 u32 entries. Each patch write
takes total of 2 u32 entries,  1 u32 for addr and 1 u32
for data. Ideally 64 entries could be written before buffer
overflows. Driver patch some things when creating the channel,
and later when context switch type is changed after channel is loaded.

Reset patch_ctx.data_count before beginning patch
write otherwise system might not be in a state to accept all
patch writes even if patch buffer has valid entries.
If the patch buffer has non-zero entries, then the patch buffer
would be read and all pri writes would be sent out. Once done,
ucode updates the main header patch buffer count to 0.

Without this fix, below priv errors seen on t186 platforms
SYS Write error for ADR 0, INFO 0d000200 and CODE badf1100
Error info decodes as:
  NV_PPRIV_SYS_PRIV_ERROR_INFO    R[0x00122128]
    SUBID                                    [29:24]  13 (?)
    LOCAL_ORDERING                           [22:22]   0 (I)
    PRIV_LEVEL                               [21:20]   0 (I)
    SENDING_RS                               [17:12]   0 (I)
    PENDING                                  [ 9: 9]   1 (?)
    ORPHAN                                   [ 8: 8]   0 (I)
    PRIV_MASTER                              [ 5: 0]   0 (I)

Ctxsw ucode(subid 13 i.e. 0xd) makes only few pri transactions
at priv level 0. Patch buffer pri writes are one of those.

Bug 200350539

Change-Id: If9e71b5fef4d85600d72a8a633a082d9261c3e1b
Signed-off-by: Seema Khowala <seemaj@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1581591
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 35 ++++++++++++++++++++++++++++++++---
 drivers/gpu/nvgpu/gk20a/gr_gk20a.h |  3 +++
 2 files changed, 35 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 1b9ecd86..241e6525 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -673,7 +673,21 @@ int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va)
 int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
 					  struct channel_ctx_gk20a *ch_ctx)
 {
-	return nvgpu_mem_begin(g, &ch_ctx->patch_ctx.mem);
+	int err = 0;
+
+	err = nvgpu_mem_begin(g, &ch_ctx->patch_ctx.mem);
+	if (err)
+		return err;
+
+	if (ch_ctx->gr_ctx->mem.cpu_va) {
+		/* reset patch count if ucode has already processed it */
+		ch_ctx->patch_ctx.data_count = nvgpu_mem_rd(g,
+						&ch_ctx->gr_ctx->mem,
+					ctxsw_prog_main_image_patch_count_o());
+		nvgpu_log(g, gpu_dbg_info, "patch count reset to %d",
+					ch_ctx->patch_ctx.data_count);
+	}
+	return 0;
 }
 
 void gr_gk20a_ctx_patch_write_end(struct gk20a *g,
@@ -686,6 +700,8 @@ void gr_gk20a_ctx_patch_write_end(struct gk20a *g,
 		nvgpu_mem_wr(g, &ch_ctx->gr_ctx->mem,
 			     ctxsw_prog_main_image_patch_count_o(),
 			     ch_ctx->patch_ctx.data_count);
+		nvgpu_log(g, gpu_dbg_info, "write patch count %d",
+			ch_ctx->patch_ctx.data_count);
 	}
 }
 
@@ -694,10 +710,20 @@ void gr_gk20a_ctx_patch_write(struct gk20a *g,
 				    u32 addr, u32 data, bool patch)
 {
 	if (patch) {
-		u32 patch_slot = ch_ctx->patch_ctx.data_count * 2;
+		u32 patch_slot = ch_ctx->patch_ctx.data_count *
+				PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY;
+		if (patch_slot > (PATCH_CTX_SLOTS_MAX -
+				PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY)) {
+			nvgpu_err(g, "failed to access patch_slot %d",
+				patch_slot);
+			return;
+		}
 		nvgpu_mem_wr32(g, &ch_ctx->patch_ctx.mem, patch_slot, addr);
 		nvgpu_mem_wr32(g, &ch_ctx->patch_ctx.mem, patch_slot + 1, data);
 		ch_ctx->patch_ctx.data_count++;
+		nvgpu_log(g, gpu_dbg_info,
+			"patch addr = 0x%x data = 0x%x data_count %d",
+			addr, data, ch_ctx->patch_ctx.data_count);
 	} else {
 		gk20a_writel(g, addr, data);
 	}
@@ -1875,6 +1901,8 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
 	virt_addr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va);
 	virt_addr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va);
 
+	nvgpu_log(g, gpu_dbg_info, "write patch count = %d",
+			ch_ctx->patch_ctx.data_count);
 	nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_patch_count_o(),
 		 ch_ctx->patch_ctx.data_count);
 
@@ -2793,7 +2821,7 @@ static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g,
 	gk20a_dbg_fn("");
 
 	err = nvgpu_dma_alloc_map_flags_sys(ch_vm, NVGPU_DMA_NO_KERNEL_MAPPING,
-					128 * sizeof(u32), &patch_ctx->mem);
+			PATCH_CTX_SLOTS_MAX * sizeof(u32), &patch_ctx->mem);
 	if (err)
 		return err;
 
@@ -2928,6 +2956,7 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a  *c,
 
 	/* allocate patch buffer */
 	if (ch_ctx->patch_ctx.mem.priv.sgt == NULL) {
+		ch_ctx->patch_ctx.data_count = 0;
 		err = gr_gk20a_alloc_channel_patch_ctx(g, c);
 		if (err) {
 			nvgpu_err(g,
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 84eb8970..52b39c4f 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -52,6 +52,9 @@
 
 #define GK20A_TIMEOUT_FPGA		100000 /* 100 sec */
 
+#define PATCH_CTX_SLOTS_MAX			128
+#define PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY	2
+
 struct channel_gk20a;
 struct nvgpu_warpstate;
 
-- 
cgit v1.2.2