summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2016-05-05 21:01:12 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2016-05-09 14:57:47 -0400
commitf14152c081d94710dbde843b8dcd9b3981afb831 (patch)
tree6ebbe3f66ba866d3ef1af6728199a3db597e3575 /drivers/gpu/nvgpu/gk20a/gr_gk20a.c
parentdf05d2a7c214bc8cdb887f1609853d0f424ef6f1 (diff)
gpu: nvgpu: Support 3 PEs per GPC
Old code maxed at 2 PEs per GPC. Support 3 PEs and add code to make sure we will get a warning if hardware supports more than that. JIRA DNVGPU-6 Change-Id: Id6061567bad20474f4b4a7a0959be3426e5e4828 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/1142440
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gr_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c31
1 files changed, 21 insertions, 10 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 84fa1e5e..aa63e559 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -3231,6 +3231,10 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
3231 gr->gpc_count = pri_ringmaster_enum_gpc_count_v(tmp); 3231 gr->gpc_count = pri_ringmaster_enum_gpc_count_v(tmp);
3232 3232
3233 gr->pe_count_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_PES_PER_GPC); 3233 gr->pe_count_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_PES_PER_GPC);
3234 if (WARN(gr->pe_count_per_gpc > GK20A_GR_MAX_PES_PER_GPC,
3235 "too many pes per gpc\n"))
3236 goto clean_up;
3237
3234 gr->max_zcull_per_gpc_count = nvgpu_get_litter_value(g, GPU_LIT_NUM_ZCULL_BANKS); 3238 gr->max_zcull_per_gpc_count = nvgpu_get_litter_value(g, GPU_LIT_NUM_ZCULL_BANKS);
3235 3239
3236 if (!gr->gpc_count) { 3240 if (!gr->gpc_count) {
@@ -3242,25 +3246,21 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
3242 gr->gpc_tpc_mask = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL); 3246 gr->gpc_tpc_mask = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
3243 gr->gpc_zcb_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL); 3247 gr->gpc_zcb_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
3244 gr->gpc_ppc_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL); 3248 gr->gpc_ppc_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
3245 gr->pes_tpc_count[0] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
3246 gr->pes_tpc_count[1] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
3247 gr->pes_tpc_mask[0] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
3248 gr->pes_tpc_mask[1] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
3249 3249
3250 gr->gpc_skip_mask = 3250 gr->gpc_skip_mask =
3251 kzalloc(gr_pd_dist_skip_table__size_1_v() * 4 * sizeof(u32), 3251 kzalloc(gr_pd_dist_skip_table__size_1_v() * 4 * sizeof(u32),
3252 GFP_KERNEL); 3252 GFP_KERNEL);
3253 3253
3254 if (!gr->gpc_tpc_count || !gr->gpc_zcb_count || !gr->gpc_ppc_count || 3254 if (!gr->gpc_tpc_count || !gr->gpc_tpc_mask || !gr->gpc_zcb_count ||
3255 !gr->pes_tpc_count[0] || !gr->pes_tpc_count[1] || 3255 !gr->gpc_ppc_count || !gr->gpc_skip_mask)
3256 !gr->pes_tpc_mask[0] || !gr->pes_tpc_mask[1] || !gr->gpc_skip_mask)
3257 goto clean_up; 3256 goto clean_up;
3258 3257
3259 gr->ppc_count = 0; 3258 gr->ppc_count = 0;
3260 gr->tpc_count = 0; 3259 gr->tpc_count = 0;
3261 gr->zcb_count = 0; 3260 gr->zcb_count = 0;
3262 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { 3261 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
3263 tmp = gk20a_readl(g, gr_gpc0_fs_gpc_r()); 3262 tmp = gk20a_readl(g, gr_gpc0_fs_gpc_r() +
3263 gpc_stride * gpc_index);
3264 3264
3265 gr->gpc_tpc_count[gpc_index] = 3265 gr->gpc_tpc_count[gpc_index] =
3266 gr_gpc0_fs_gpc_num_available_tpcs_v(tmp); 3266 gr_gpc0_fs_gpc_num_available_tpcs_v(tmp);
@@ -3278,6 +3278,15 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
3278 g->ops.gr.get_gpc_tpc_mask(g, gpc_index); 3278 g->ops.gr.get_gpc_tpc_mask(g, gpc_index);
3279 3279
3280 for (pes_index = 0; pes_index < gr->pe_count_per_gpc; pes_index++) { 3280 for (pes_index = 0; pes_index < gr->pe_count_per_gpc; pes_index++) {
3281 gr->pes_tpc_count[pes_index] =
3282 kzalloc(gr->gpc_count * sizeof(u32),
3283 GFP_KERNEL);
3284 gr->pes_tpc_mask[pes_index] =
3285 kzalloc(gr->gpc_count * sizeof(u32),
3286 GFP_KERNEL);
3287 if (!gr->pes_tpc_count[pes_index] ||
3288 !gr->pes_tpc_mask[pes_index])
3289 goto clean_up;
3281 3290
3282 tmp = gk20a_readl(g, 3291 tmp = gk20a_readl(g,
3283 gr_gpc0_gpm_pd_pes_tpc_id_mask_r(pes_index) + 3292 gr_gpc0_gpm_pd_pes_tpc_id_mask_r(pes_index) +
@@ -3291,7 +3300,8 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
3291 } 3300 }
3292 3301
3293 gpc_new_skip_mask = 0; 3302 gpc_new_skip_mask = 0;
3294 if (gr->pes_tpc_count[0][gpc_index] + 3303 if (gr->pe_count_per_gpc > 1 &&
3304 gr->pes_tpc_count[0][gpc_index] +
3295 gr->pes_tpc_count[1][gpc_index] == 5) { 3305 gr->pes_tpc_count[1][gpc_index] == 5) {
3296 pes_heavy_index = 3306 pes_heavy_index =
3297 gr->pes_tpc_count[0][gpc_index] > 3307 gr->pes_tpc_count[0][gpc_index] >
@@ -3302,7 +3312,8 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
3302 (gr->pes_tpc_mask[pes_heavy_index][gpc_index] & 3312 (gr->pes_tpc_mask[pes_heavy_index][gpc_index] &
3303 (gr->pes_tpc_mask[pes_heavy_index][gpc_index] - 1)); 3313 (gr->pes_tpc_mask[pes_heavy_index][gpc_index] - 1));
3304 3314
3305 } else if ((gr->pes_tpc_count[0][gpc_index] + 3315 } else if (gr->pe_count_per_gpc > 1 &&
3316 (gr->pes_tpc_count[0][gpc_index] +
3306 gr->pes_tpc_count[1][gpc_index] == 4) && 3317 gr->pes_tpc_count[1][gpc_index] == 4) &&
3307 (gr->pes_tpc_count[0][gpc_index] != 3318 (gr->pes_tpc_count[0][gpc_index] !=
3308 gr->pes_tpc_count[1][gpc_index])) { 3319 gr->pes_tpc_count[1][gpc_index])) {