From f14152c081d94710dbde843b8dcd9b3981afb831 Mon Sep 17 00:00:00 2001
From: Terje Bergstrom <tbergstrom@nvidia.com>
Date: Thu, 5 May 2016 18:01:12 -0700
Subject: gpu: nvgpu: Support 3 PEs per GPC

Old code maxed at 2 PEs per GPC. Support 3 PEs and add code to make
sure we will get a warning if hardware supports more than that.

JIRA DNVGPU-6

Change-Id: Id6061567bad20474f4b4a7a0959be3426e5e4828
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1142440
---
 drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 31 +++++++++++++++++++++----------
 1 file changed, 21 insertions(+), 10 deletions(-)

(limited to 'drivers/gpu/nvgpu/gk20a/gr_gk20a.c')

diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 84fa1e5e..aa63e559 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -3231,6 +3231,10 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
 	gr->gpc_count = pri_ringmaster_enum_gpc_count_v(tmp);
 
 	gr->pe_count_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_PES_PER_GPC);
+	if (WARN(gr->pe_count_per_gpc > GK20A_GR_MAX_PES_PER_GPC,
+		 "too many pes per gpc\n"))
+		goto clean_up;
+
 	gr->max_zcull_per_gpc_count = nvgpu_get_litter_value(g, GPU_LIT_NUM_ZCULL_BANKS);
 
 	if (!gr->gpc_count) {
@@ -3242,25 +3246,21 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
 	gr->gpc_tpc_mask = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
 	gr->gpc_zcb_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
 	gr->gpc_ppc_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
-	gr->pes_tpc_count[0] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
-	gr->pes_tpc_count[1] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
-	gr->pes_tpc_mask[0] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
-	gr->pes_tpc_mask[1] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
 
 	gr->gpc_skip_mask =
 		kzalloc(gr_pd_dist_skip_table__size_1_v() * 4 * sizeof(u32),
 			GFP_KERNEL);
 
-	if (!gr->gpc_tpc_count || !gr->gpc_zcb_count || !gr->gpc_ppc_count ||
-	    !gr->pes_tpc_count[0] || !gr->pes_tpc_count[1] ||
-	    !gr->pes_tpc_mask[0] || !gr->pes_tpc_mask[1] || !gr->gpc_skip_mask)
+	if (!gr->gpc_tpc_count || !gr->gpc_tpc_mask || !gr->gpc_zcb_count ||
+	    !gr->gpc_ppc_count || !gr->gpc_skip_mask)
 		goto clean_up;
 
 	gr->ppc_count = 0;
 	gr->tpc_count = 0;
 	gr->zcb_count = 0;
 	for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
-		tmp = gk20a_readl(g, gr_gpc0_fs_gpc_r());
+		tmp = gk20a_readl(g, gr_gpc0_fs_gpc_r() +
+				     gpc_stride * gpc_index);
 
 		gr->gpc_tpc_count[gpc_index] =
 			gr_gpc0_fs_gpc_num_available_tpcs_v(tmp);
@@ -3278,6 +3278,15 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
 				g->ops.gr.get_gpc_tpc_mask(g, gpc_index);
 
 		for (pes_index = 0; pes_index < gr->pe_count_per_gpc; pes_index++) {
+			gr->pes_tpc_count[pes_index] =
+				kzalloc(gr->gpc_count * sizeof(u32),
+					GFP_KERNEL);
+			gr->pes_tpc_mask[pes_index] =
+				kzalloc(gr->gpc_count * sizeof(u32),
+					GFP_KERNEL);
+			if (!gr->pes_tpc_count[pes_index] ||
+			    !gr->pes_tpc_mask[pes_index])
+				goto clean_up;
 
 			tmp = gk20a_readl(g,
 				gr_gpc0_gpm_pd_pes_tpc_id_mask_r(pes_index) +
@@ -3291,7 +3300,8 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
 		}
 
 		gpc_new_skip_mask = 0;
-		if (gr->pes_tpc_count[0][gpc_index] +
+		if (gr->pe_count_per_gpc > 1 &&
+		    gr->pes_tpc_count[0][gpc_index] +
 		    gr->pes_tpc_count[1][gpc_index] == 5) {
 			pes_heavy_index =
 				gr->pes_tpc_count[0][gpc_index] >
@@ -3302,7 +3312,8 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
 				   (gr->pes_tpc_mask[pes_heavy_index][gpc_index] &
 				   (gr->pes_tpc_mask[pes_heavy_index][gpc_index] - 1));
 
-		} else if ((gr->pes_tpc_count[0][gpc_index] +
+		} else if (gr->pe_count_per_gpc > 1 &&
+			   (gr->pes_tpc_count[0][gpc_index] +
 			    gr->pes_tpc_count[1][gpc_index] == 4) &&
 			   (gr->pes_tpc_count[0][gpc_index] !=
 			    gr->pes_tpc_count[1][gpc_index])) {
-- 
cgit v1.2.2