From 20294264468eecc8c1a065aa83d5f1b46ff07e7e Mon Sep 17 00:00:00 2001
From: David Nieto <dmartineznie@nvidia.com>
Date: Mon, 23 Oct 2017 14:01:28 -0700
Subject: gpu: nvgpu: gv1xx: resize patch buffer

Follow the sizing consideration in bug 1753763 to support dynamic TPC modes
and subcontexts.

bug 200350539

Change-Id: Ibbdbf02f9c2ea3f082c1b2810ae7176b0775d461
Signed-off-by: David Nieto <dmartineznie@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1584034
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/gv100/gr_gv100.c  | 53 +++++++++++++++++++++++++++++++++++++
 drivers/gpu/nvgpu/gv100/gr_gv100.h  |  2 +-
 drivers/gpu/nvgpu/gv100/hal_gv100.c |  1 +
 drivers/gpu/nvgpu/gv11b/hal_gv11b.c |  1 +
 4 files changed, 56 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.c b/drivers/gpu/nvgpu/gv100/gr_gv100.c
index 8a4b88b4..430c7cd0 100644
--- a/drivers/gpu/nvgpu/gv100/gr_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/gr_gv100.c
@@ -294,3 +294,56 @@ void gr_gv100_load_tpc_mask(struct gk20a *g)
 	gk20a_writel(g, gr_fe_tpc_fs_r(0), u64_lo32(pes_tpc_mask));
 	gk20a_writel(g, gr_fe_tpc_fs_r(1), u64_hi32(pes_tpc_mask));
 }
+
+u32 gr_gv100_get_patch_slots(struct gk20a *g)
+{
+	struct gr_gk20a *gr = &g->gr;
+	struct fifo_gk20a *f = &g->fifo;
+	u32 size = 0;
+
+	/*
+	 * CMD to update PE table
+	 */
+	size++;
+
+	/*
+	 * Update PE table contents
+	 * for PE table, each patch buffer update writes 32 TPCs
+	 */
+	size += DIV_ROUND_UP(gr->tpc_count, 32);
+
+	/*
+	 * Update the PL table contents
+	 * For PL table, each patch buffer update configures 4 TPCs
+	 */
+	size += DIV_ROUND_UP(gr->tpc_count, 4);
+
+	/*
+	 * We need this for all subcontexts
+	 */
+	size *= f->t19x.max_subctx_count;
+
+	/*
+	 * Add space for a partition mode change as well
+	 * reserve two slots since DYNAMIC -> STATIC requires
+	 * DYNAMIC -> NONE -> STATIC
+	 */
+	size += 2;
+
+	/*
+	 * Add current patch buffer size
+	 */
+	size += gr_gk20a_get_patch_slots(g);
+
+	/*
+	 * Align to 4K size
+	 */
+	size = ALIGN(size, PATCH_CTX_SLOTS_PER_PAGE);
+
+	/*
+	 * Increase the size to accommodate for additional TPC partition update
+	 */
+	size += 2 * PATCH_CTX_SLOTS_PER_PAGE;
+
+	return size;
+}
diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.h b/drivers/gpu/nvgpu/gv100/gr_gv100.h
index 460b05ae..612f76f9 100644
--- a/drivers/gpu/nvgpu/gv100/gr_gv100.h
+++ b/drivers/gpu/nvgpu/gv100/gr_gv100.h
@@ -32,5 +32,5 @@ void gr_gv100_init_sm_id_table(struct gk20a *g);
 void gr_gv100_program_sm_id_numbering(struct gk20a *g,
 					u32 gpc, u32 tpc, u32 smid);
 int gr_gv100_load_smid_config(struct gk20a *g);
-
+u32 gr_gv100_get_patch_slots(struct gk20a *g);
 #endif
diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c
index e51b4446..61e9e46d 100644
--- a/drivers/gpu/nvgpu/gv100/hal_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c
@@ -261,6 +261,7 @@ static const struct gpu_ops gv100_ops = {
 		.get_num_pce = gv11b_ce_get_num_pce,
 	},
 	.gr = {
+		.get_patch_slots = gr_gv100_get_patch_slots,
 		.init_gpc_mmu = gr_gv11b_init_gpc_mmu,
 		.bundle_cb_defaults = gr_gv100_bundle_cb_defaults,
 		.cb_size_default = gr_gv100_cb_size_default,
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
index 400c2ad0..4de9786b 100644
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -227,6 +227,7 @@ static const struct gpu_ops gv11b_ops = {
 		.get_num_pce = gv11b_ce_get_num_pce,
 	},
 	.gr = {
+		.get_patch_slots = gr_gv100_get_patch_slots,
 		.init_gpc_mmu = gr_gv11b_init_gpc_mmu,
 		.bundle_cb_defaults = gr_gv11b_bundle_cb_defaults,
 		.cb_size_default = gr_gv11b_cb_size_default,
-- 
cgit v1.2.2