diff options
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 31 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 6 |
2 files changed, 25 insertions, 12 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 84fa1e5e..aa63e559 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -3231,6 +3231,10 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) | |||
3231 | gr->gpc_count = pri_ringmaster_enum_gpc_count_v(tmp); | 3231 | gr->gpc_count = pri_ringmaster_enum_gpc_count_v(tmp); |
3232 | 3232 | ||
3233 | gr->pe_count_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_PES_PER_GPC); | 3233 | gr->pe_count_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_PES_PER_GPC); |
3234 | if (WARN(gr->pe_count_per_gpc > GK20A_GR_MAX_PES_PER_GPC, | ||
3235 | "too many pes per gpc\n")) | ||
3236 | goto clean_up; | ||
3237 | |||
3234 | gr->max_zcull_per_gpc_count = nvgpu_get_litter_value(g, GPU_LIT_NUM_ZCULL_BANKS); | 3238 | gr->max_zcull_per_gpc_count = nvgpu_get_litter_value(g, GPU_LIT_NUM_ZCULL_BANKS); |
3235 | 3239 | ||
3236 | if (!gr->gpc_count) { | 3240 | if (!gr->gpc_count) { |
@@ -3242,25 +3246,21 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) | |||
3242 | gr->gpc_tpc_mask = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL); | 3246 | gr->gpc_tpc_mask = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL); |
3243 | gr->gpc_zcb_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL); | 3247 | gr->gpc_zcb_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL); |
3244 | gr->gpc_ppc_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL); | 3248 | gr->gpc_ppc_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL); |
3245 | gr->pes_tpc_count[0] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL); | ||
3246 | gr->pes_tpc_count[1] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL); | ||
3247 | gr->pes_tpc_mask[0] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL); | ||
3248 | gr->pes_tpc_mask[1] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL); | ||
3249 | 3249 | ||
3250 | gr->gpc_skip_mask = | 3250 | gr->gpc_skip_mask = |
3251 | kzalloc(gr_pd_dist_skip_table__size_1_v() * 4 * sizeof(u32), | 3251 | kzalloc(gr_pd_dist_skip_table__size_1_v() * 4 * sizeof(u32), |
3252 | GFP_KERNEL); | 3252 | GFP_KERNEL); |
3253 | 3253 | ||
3254 | if (!gr->gpc_tpc_count || !gr->gpc_zcb_count || !gr->gpc_ppc_count || | 3254 | if (!gr->gpc_tpc_count || !gr->gpc_tpc_mask || !gr->gpc_zcb_count || |
3255 | !gr->pes_tpc_count[0] || !gr->pes_tpc_count[1] || | 3255 | !gr->gpc_ppc_count || !gr->gpc_skip_mask) |
3256 | !gr->pes_tpc_mask[0] || !gr->pes_tpc_mask[1] || !gr->gpc_skip_mask) | ||
3257 | goto clean_up; | 3256 | goto clean_up; |
3258 | 3257 | ||
3259 | gr->ppc_count = 0; | 3258 | gr->ppc_count = 0; |
3260 | gr->tpc_count = 0; | 3259 | gr->tpc_count = 0; |
3261 | gr->zcb_count = 0; | 3260 | gr->zcb_count = 0; |
3262 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | 3261 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { |
3263 | tmp = gk20a_readl(g, gr_gpc0_fs_gpc_r()); | 3262 | tmp = gk20a_readl(g, gr_gpc0_fs_gpc_r() + |
3263 | gpc_stride * gpc_index); | ||
3264 | 3264 | ||
3265 | gr->gpc_tpc_count[gpc_index] = | 3265 | gr->gpc_tpc_count[gpc_index] = |
3266 | gr_gpc0_fs_gpc_num_available_tpcs_v(tmp); | 3266 | gr_gpc0_fs_gpc_num_available_tpcs_v(tmp); |
@@ -3278,6 +3278,15 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) | |||
3278 | g->ops.gr.get_gpc_tpc_mask(g, gpc_index); | 3278 | g->ops.gr.get_gpc_tpc_mask(g, gpc_index); |
3279 | 3279 | ||
3280 | for (pes_index = 0; pes_index < gr->pe_count_per_gpc; pes_index++) { | 3280 | for (pes_index = 0; pes_index < gr->pe_count_per_gpc; pes_index++) { |
3281 | gr->pes_tpc_count[pes_index] = | ||
3282 | kzalloc(gr->gpc_count * sizeof(u32), | ||
3283 | GFP_KERNEL); | ||
3284 | gr->pes_tpc_mask[pes_index] = | ||
3285 | kzalloc(gr->gpc_count * sizeof(u32), | ||
3286 | GFP_KERNEL); | ||
3287 | if (!gr->pes_tpc_count[pes_index] || | ||
3288 | !gr->pes_tpc_mask[pes_index]) | ||
3289 | goto clean_up; | ||
3281 | 3290 | ||
3282 | tmp = gk20a_readl(g, | 3291 | tmp = gk20a_readl(g, |
3283 | gr_gpc0_gpm_pd_pes_tpc_id_mask_r(pes_index) + | 3292 | gr_gpc0_gpm_pd_pes_tpc_id_mask_r(pes_index) + |
@@ -3291,7 +3300,8 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) | |||
3291 | } | 3300 | } |
3292 | 3301 | ||
3293 | gpc_new_skip_mask = 0; | 3302 | gpc_new_skip_mask = 0; |
3294 | if (gr->pes_tpc_count[0][gpc_index] + | 3303 | if (gr->pe_count_per_gpc > 1 && |
3304 | gr->pes_tpc_count[0][gpc_index] + | ||
3295 | gr->pes_tpc_count[1][gpc_index] == 5) { | 3305 | gr->pes_tpc_count[1][gpc_index] == 5) { |
3296 | pes_heavy_index = | 3306 | pes_heavy_index = |
3297 | gr->pes_tpc_count[0][gpc_index] > | 3307 | gr->pes_tpc_count[0][gpc_index] > |
@@ -3302,7 +3312,8 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) | |||
3302 | (gr->pes_tpc_mask[pes_heavy_index][gpc_index] & | 3312 | (gr->pes_tpc_mask[pes_heavy_index][gpc_index] & |
3303 | (gr->pes_tpc_mask[pes_heavy_index][gpc_index] - 1)); | 3313 | (gr->pes_tpc_mask[pes_heavy_index][gpc_index] - 1)); |
3304 | 3314 | ||
3305 | } else if ((gr->pes_tpc_count[0][gpc_index] + | 3315 | } else if (gr->pe_count_per_gpc > 1 && |
3316 | (gr->pes_tpc_count[0][gpc_index] + | ||
3306 | gr->pes_tpc_count[1][gpc_index] == 4) && | 3317 | gr->pes_tpc_count[1][gpc_index] == 4) && |
3307 | (gr->pes_tpc_count[0][gpc_index] != | 3318 | (gr->pes_tpc_count[0][gpc_index] != |
3308 | gr->pes_tpc_count[1][gpc_index])) { | 3319 | gr->pes_tpc_count[1][gpc_index])) { |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index e1e6e262..24123eea 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h | |||
@@ -36,6 +36,8 @@ | |||
36 | #define GK20A_FECS_UCODE_IMAGE "fecs.bin" | 36 | #define GK20A_FECS_UCODE_IMAGE "fecs.bin" |
37 | #define GK20A_GPCCS_UCODE_IMAGE "gpccs.bin" | 37 | #define GK20A_GPCCS_UCODE_IMAGE "gpccs.bin" |
38 | 38 | ||
39 | #define GK20A_GR_MAX_PES_PER_GPC 3 | ||
40 | |||
39 | enum /* global_ctx_buffer */ { | 41 | enum /* global_ctx_buffer */ { |
40 | CIRCULAR = 0, | 42 | CIRCULAR = 0, |
41 | PAGEPOOL = 1, | 43 | PAGEPOOL = 1, |
@@ -270,8 +272,8 @@ struct gr_gk20a { | |||
270 | u32 *gpc_tpc_mask; | 272 | u32 *gpc_tpc_mask; |
271 | u32 zcb_count; | 273 | u32 zcb_count; |
272 | u32 *gpc_zcb_count; | 274 | u32 *gpc_zcb_count; |
273 | u32 *pes_tpc_count[2]; | 275 | u32 *pes_tpc_count[GK20A_GR_MAX_PES_PER_GPC]; |
274 | u32 *pes_tpc_mask[2]; | 276 | u32 *pes_tpc_mask[GK20A_GR_MAX_PES_PER_GPC]; |
275 | u32 *gpc_skip_mask; | 277 | u32 *gpc_skip_mask; |
276 | 278 | ||
277 | u32 bundle_cb_default_size; | 279 | u32 bundle_cb_default_size; |