summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
diff options
context:
space:
mode:
authorSeema Khowala <seemaj@nvidia.com>2016-09-15 17:37:31 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2016-10-14 11:11:20 -0400
commit94efd53ed1c8202b4b46af41ec8ab580774f4974 (patch)
tree8639358ff6be216c40755896b323ac9966107321 /drivers/gpu/nvgpu/gk20a/gr_gk20a.c
parent1029136eaa1c7c1cb9a9c8413af439fd741dc232 (diff)
gpu: nvgpu: fix zcull programming
There are eight tiles per map tile register and depending on how many tpcs are present, there is a chance that s/w will be accessing un-allocated memory for reading tile values from temp buffers. Bug 1735760 Change-Id: I5c0e09ec75099aaf6ad03dde964b9e93c2dc2408 Signed-off-by: Seema Khowala <seemaj@nvidia.com> Reviewed-on: http://git-master/r/1221580 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com> Tested-by: Seshendra Gadagottu <sgadagottu@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gr_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c166
1 files changed, 119 insertions, 47 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 75f4379d..7f24747c 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -4301,6 +4301,107 @@ void gr_gk20a_init_cg_mode(struct gk20a *g, u32 cgmode, u32 mode_config)
4301 } 4301 }
4302} 4302}
4303 4303
4304void gr_gk20a_program_zcull_mapping(struct gk20a *g, u32 zcull_num_entries,
4305 u32 *zcull_map_tiles)
4306{
4307 u32 val;
4308
4309 gk20a_dbg_fn("");
4310
4311 if (zcull_num_entries >= 8) {
4312 gk20a_dbg_fn("map0");
4313 val =
4314 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_0_f(
4315 zcull_map_tiles[0]) |
4316 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_1_f(
4317 zcull_map_tiles[1]) |
4318 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_2_f(
4319 zcull_map_tiles[2]) |
4320 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_3_f(
4321 zcull_map_tiles[3]) |
4322 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_4_f(
4323 zcull_map_tiles[4]) |
4324 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_5_f(
4325 zcull_map_tiles[5]) |
4326 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_6_f(
4327 zcull_map_tiles[6]) |
4328 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_7_f(
4329 zcull_map_tiles[7]);
4330
4331 gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map0_r(), val);
4332 }
4333
4334 if (zcull_num_entries >= 16) {
4335 gk20a_dbg_fn("map1");
4336 val =
4337 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_8_f(
4338 zcull_map_tiles[8]) |
4339 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_9_f(
4340 zcull_map_tiles[9]) |
4341 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_10_f(
4342 zcull_map_tiles[10]) |
4343 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_11_f(
4344 zcull_map_tiles[11]) |
4345 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_12_f(
4346 zcull_map_tiles[12]) |
4347 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_13_f(
4348 zcull_map_tiles[13]) |
4349 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_14_f(
4350 zcull_map_tiles[14]) |
4351 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_15_f(
4352 zcull_map_tiles[15]);
4353
4354 gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map1_r(), val);
4355 }
4356
4357 if (zcull_num_entries >= 24) {
4358 gk20a_dbg_fn("map2");
4359 val =
4360 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_16_f(
4361 zcull_map_tiles[16]) |
4362 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_17_f(
4363 zcull_map_tiles[17]) |
4364 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_18_f(
4365 zcull_map_tiles[18]) |
4366 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_19_f(
4367 zcull_map_tiles[19]) |
4368 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_20_f(
4369 zcull_map_tiles[20]) |
4370 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_21_f(
4371 zcull_map_tiles[21]) |
4372 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_22_f(
4373 zcull_map_tiles[22]) |
4374 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_f(
4375 zcull_map_tiles[23]);
4376
4377 gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map2_r(), val);
4378 }
4379
4380 if (zcull_num_entries >= 32) {
4381 gk20a_dbg_fn("map3");
4382 val =
4383 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_24_f(
4384 zcull_map_tiles[24]) |
4385 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_25_f(
4386 zcull_map_tiles[25]) |
4387 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_26_f(
4388 zcull_map_tiles[26]) |
4389 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_27_f(
4390 zcull_map_tiles[27]) |
4391 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_28_f(
4392 zcull_map_tiles[28]) |
4393 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_29_f(
4394 zcull_map_tiles[29]) |
4395 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_30_f(
4396 zcull_map_tiles[30]) |
4397 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_31_f(
4398 zcull_map_tiles[31]);
4399
4400 gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map3_r(), val);
4401 }
4402
4403}
4404
4304static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr) 4405static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr)
4305{ 4406{
4306 u32 gpc_index, gpc_tpc_count, gpc_zcull_count; 4407 u32 gpc_index, gpc_tpc_count, gpc_zcull_count;
@@ -4310,25 +4411,31 @@ static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr)
4310 u32 offset; 4411 u32 offset;
4311 bool floorsweep = false; 4412 bool floorsweep = false;
4312 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); 4413 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
4313 int num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS); 4414 u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS);
4314 int num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); 4415 u32 num_tpc_per_gpc = nvgpu_get_litter_value(g,
4416 GPU_LIT_NUM_TPC_PER_GPC);
4417 u32 zcull_alloc_num = num_gpcs * num_tpc_per_gpc;
4315 4418
4316 if (!gr->map_tiles) 4419 if (!gr->map_tiles)
4317 return -1; 4420 return -1;
4318 4421
4319 zcull_map_tiles = kzalloc(num_gpcs * 4422 if (zcull_alloc_num % 8 != 0) {
4320 num_tpc_per_gpc * sizeof(u32), GFP_KERNEL); 4423 /* Total 8 fields per map reg i.e. tile_0 to tile_7*/
4424 zcull_alloc_num += (zcull_alloc_num % 8);
4425 }
4426 zcull_map_tiles = kzalloc(zcull_alloc_num *
4427 sizeof(u32), GFP_KERNEL);
4321 if (!zcull_map_tiles) { 4428 if (!zcull_map_tiles) {
4322 gk20a_err(dev_from_gk20a(g), 4429 gk20a_err(dev_from_gk20a(g),
4323 "failed to allocate zcull temp buffers"); 4430 "failed to allocate zcull map titles");
4324 return -ENOMEM; 4431 return -ENOMEM;
4325 } 4432 }
4326 zcull_bank_counters = kzalloc(num_gpcs * 4433 zcull_bank_counters = kzalloc(zcull_alloc_num *
4327 num_tpc_per_gpc * sizeof(u32), GFP_KERNEL); 4434 sizeof(u32), GFP_KERNEL);
4328 4435
4329 if (!zcull_bank_counters) { 4436 if (!zcull_bank_counters) {
4330 gk20a_err(dev_from_gk20a(g), 4437 gk20a_err(dev_from_gk20a(g),
4331 "failed to allocate zcull temp buffers"); 4438 "failed to allocate zcull bank counters");
4332 kfree(zcull_map_tiles); 4439 kfree(zcull_map_tiles);
4333 return -ENOMEM; 4440 return -ENOMEM;
4334 } 4441 }
@@ -4339,45 +4446,9 @@ static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr)
4339 zcull_bank_counters[gr->map_tiles[map_counter]]++; 4446 zcull_bank_counters[gr->map_tiles[map_counter]]++;
4340 } 4447 }
4341 4448
4342 gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map0_r(), 4449 if (g->ops.gr.program_zcull_mapping)
4343 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_0_f(zcull_map_tiles[0]) | 4450 g->ops.gr.program_zcull_mapping(g, zcull_alloc_num,
4344 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_1_f(zcull_map_tiles[1]) | 4451 zcull_map_tiles);
4345 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_2_f(zcull_map_tiles[2]) |
4346 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_3_f(zcull_map_tiles[3]) |
4347 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_4_f(zcull_map_tiles[4]) |
4348 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_5_f(zcull_map_tiles[5]) |
4349 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_6_f(zcull_map_tiles[6]) |
4350 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_7_f(zcull_map_tiles[7]));
4351
4352 gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map1_r(),
4353 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_8_f(zcull_map_tiles[8]) |
4354 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_9_f(zcull_map_tiles[9]) |
4355 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_10_f(zcull_map_tiles[10]) |
4356 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_11_f(zcull_map_tiles[11]) |
4357 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_12_f(zcull_map_tiles[12]) |
4358 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_13_f(zcull_map_tiles[13]) |
4359 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_14_f(zcull_map_tiles[14]) |
4360 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_15_f(zcull_map_tiles[15]));
4361
4362 gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map2_r(),
4363 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_16_f(zcull_map_tiles[16]) |
4364 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_17_f(zcull_map_tiles[17]) |
4365 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_18_f(zcull_map_tiles[18]) |
4366 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_19_f(zcull_map_tiles[19]) |
4367 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_20_f(zcull_map_tiles[20]) |
4368 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_21_f(zcull_map_tiles[21]) |
4369 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_22_f(zcull_map_tiles[22]) |
4370 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_f(zcull_map_tiles[23]));
4371
4372 gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map3_r(),
4373 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_24_f(zcull_map_tiles[24]) |
4374 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_25_f(zcull_map_tiles[25]) |
4375 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_26_f(zcull_map_tiles[26]) |
4376 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_27_f(zcull_map_tiles[27]) |
4377 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_28_f(zcull_map_tiles[28]) |
4378 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_29_f(zcull_map_tiles[29]) |
4379 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_30_f(zcull_map_tiles[30]) |
4380 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_31_f(zcull_map_tiles[31]));
4381 4452
4382 kfree(zcull_map_tiles); 4453 kfree(zcull_map_tiles);
4383 kfree(zcull_bank_counters); 4454 kfree(zcull_bank_counters);
@@ -9059,4 +9130,5 @@ void gk20a_init_gr_ops(struct gpu_ops *gops)
9059 gops->gr.split_ltc_broadcast_addr = 9130 gops->gr.split_ltc_broadcast_addr =
9060 gr_gk20a_split_ltc_broadcast_addr_stub; 9131 gr_gk20a_split_ltc_broadcast_addr_stub;
9061 gops->gr.setup_rop_mapping = gr_gk20a_setup_rop_mapping; 9132 gops->gr.setup_rop_mapping = gr_gk20a_setup_rop_mapping;
9133 gops->gr.program_zcull_mapping = gr_gk20a_program_zcull_mapping;
9062} 9134}