From 0f8746130ba79ec82a4b0675bbe00ab1ba17d3f8 Mon Sep 17 00:00:00 2001
From: David Nieto <dmartineznie@nvidia.com>
Date: Mon, 23 Oct 2017 13:58:37 -0700
Subject: gpu: nvgpu: halify size of patch buffer

Allow per chip calculation of gr patch buffer size
and set default to match hw default of 512 data-address pair entries (4K)

bug 200350539

Change-Id: I6010c9e0304332825cb02612d3f10523ef27d128
Signed-off-by: David Nieto <dmartineznie@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1584033
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/gk20a.h               |  1 +
 drivers/gpu/nvgpu/gk20a/gr_gk20a.c            | 17 +++++++++++++++--
 drivers/gpu/nvgpu/gk20a/gr_gk20a.h            |  9 ++++++++-
 drivers/gpu/nvgpu/gm20b/hal_gm20b.c           |  1 +
 drivers/gpu/nvgpu/gp106/hal_gp106.c           |  1 +
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c           |  1 +
 drivers/gpu/nvgpu/vgpu/gm20b/vgpu_hal_gm20b.c |  1 +
 drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c |  1 +
 8 files changed, 29 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 80d85d65..d7fdffb0 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -172,6 +172,7 @@ struct gpu_ops {
 		u32 (*get_num_pce)(struct gk20a *g);
 	} ce2;
 	struct {
+		u32 (*get_patch_slots)(struct gk20a *g);
 		int (*init_fs_state)(struct gk20a *g);
 		int (*init_preemption_state)(struct gk20a *g);
 		void (*access_smpc_reg)(struct gk20a *g, u32 quad, u32 offset);
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 71fe44a3..3c3ddc80 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -714,7 +714,8 @@ void gr_gk20a_ctx_patch_write(struct gk20a *g,
 	if (patch) {
 		u32 patch_slot = ch_ctx->patch_ctx.data_count *
 				PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY;
-		if (patch_slot > (PATCH_CTX_SLOTS_MAX -
+		if (patch_slot > (PATCH_CTX_ENTRIES_FROM_SIZE(
+					ch_ctx->patch_ctx.mem.size) -
 				PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY)) {
 			nvgpu_err(g, "failed to access patch_slot %d",
 				patch_slot);
@@ -2813,17 +2814,29 @@ static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c)
 	c->ch_ctx.gr_ctx = NULL;
 }
 
+u32 gr_gk20a_get_patch_slots(struct gk20a *g)
+{
+	return PATCH_CTX_SLOTS_PER_PAGE;
+}
+
 static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g,
 				struct channel_gk20a *c)
 {
 	struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx;
 	struct vm_gk20a *ch_vm = c->vm;
+	u32 alloc_size;
 	int err = 0;
 
 	gk20a_dbg_fn("");
 
+	alloc_size = g->ops.gr.get_patch_slots(g) *
+		PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY;
+
+	nvgpu_log(g, gpu_dbg_info, "patch buffer size in entries: %d",
+		alloc_size);
+
 	err = nvgpu_dma_alloc_map_flags_sys(ch_vm, NVGPU_DMA_NO_KERNEL_MAPPING,
-			PATCH_CTX_SLOTS_MAX * sizeof(u32), &patch_ctx->mem);
+			alloc_size * sizeof(u32), &patch_ctx->mem);
 	if (err)
 		return err;
 
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 0a685d01..db1a9514 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -52,8 +52,14 @@
 
 #define GK20A_TIMEOUT_FPGA		100000 /* 100 sec */
 
-#define PATCH_CTX_SLOTS_MAX			128
+/*
+ * allocate a minimum of 1 page (4KB) worth of patch space, this is 512 entries
+ * of address and data pairs
+ */
 #define PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY	2
+#define PATCH_CTX_SLOTS_PER_PAGE \
+	(PAGE_SIZE/(PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY * sizeof(u32)))
+#define PATCH_CTX_ENTRIES_FROM_SIZE(size) (size/sizeof(u32))
 
 struct channel_gk20a;
 struct nvgpu_warpstate;
@@ -756,5 +762,6 @@ void gk20a_gr_get_ovr_perf_regs(struct gk20a *g, u32 *num_ovr_perf_regs,
 					       u32 **ovr_perf_regs);
 void gk20a_gr_init_ctxsw_hdr_data(struct gk20a *g,
 					struct nvgpu_mem *mem);
+u32 gr_gk20a_get_patch_slots(struct gk20a *g);
 
 #endif /*__GR_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
index d081fb24..69afb9bc 100644
--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -181,6 +181,7 @@ static const struct gpu_ops gm20b_ops = {
 		.isr_nonstall = gk20a_ce2_nonstall_isr,
 	},
 	.gr = {
+		.get_patch_slots = gr_gk20a_get_patch_slots,
 		.init_gpc_mmu = gr_gm20b_init_gpc_mmu,
 		.bundle_cb_defaults = gr_gm20b_bundle_cb_defaults,
 		.cb_size_default = gr_gm20b_cb_size_default,
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index c5b62948..f576278d 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -235,6 +235,7 @@ static const struct gpu_ops gp106_ops = {
 		.isr_nonstall = gp10b_ce_nonstall_isr,
 	},
 	.gr = {
+		.get_patch_slots = gr_gk20a_get_patch_slots,
 		.init_gpc_mmu = gr_gm20b_init_gpc_mmu,
 		.bundle_cb_defaults = gr_gm20b_bundle_cb_defaults,
 		.cb_size_default = gr_gp106_cb_size_default,
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index 7b5cc2ac..cbec89bc 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -190,6 +190,7 @@ static const struct gpu_ops gp10b_ops = {
 		.isr_nonstall = gp10b_ce_nonstall_isr,
 	},
 	.gr = {
+		.get_patch_slots = gr_gk20a_get_patch_slots,
 		.init_gpc_mmu = gr_gm20b_init_gpc_mmu,
 		.bundle_cb_defaults = gr_gm20b_bundle_cb_defaults,
 		.cb_size_default = gr_gp10b_cb_size_default,
diff --git a/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_hal_gm20b.c b/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_hal_gm20b.c
index 81bcdc21..29eaf80b 100644
--- a/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_hal_gm20b.c
+++ b/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_hal_gm20b.c
@@ -77,6 +77,7 @@ static const struct gpu_ops vgpu_gm20b_ops = {
 		.get_num_pce = vgpu_ce_get_num_pce,
 	},
 	.gr = {
+		.get_patch_slots = gr_gk20a_get_patch_slots,
 		.init_gpc_mmu = gr_gm20b_init_gpc_mmu,
 		.bundle_cb_defaults = gr_gm20b_bundle_cb_defaults,
 		.cb_size_default = gr_gm20b_cb_size_default,
diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c
index 04a7349a..8dd63144 100644
--- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c
@@ -90,6 +90,7 @@ static const struct gpu_ops vgpu_gp10b_ops = {
 		.get_num_pce = vgpu_ce_get_num_pce,
 	},
 	.gr = {
+		.get_patch_slots = gr_gk20a_get_patch_slots,
 		.init_gpc_mmu = gr_gm20b_init_gpc_mmu,
 		.bundle_cb_defaults = gr_gm20b_bundle_cb_defaults,
 		.cb_size_default = gr_gp10b_cb_size_default,
-- 
cgit v1.2.2