diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 166 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 1 |
4 files changed, 124 insertions, 47 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index a76798d1..933ab4a9 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -305,6 +305,8 @@ struct gpu_ops { | |||
305 | void (*program_active_tpc_counts)(struct gk20a *g, u32 gpc); | 305 | void (*program_active_tpc_counts)(struct gk20a *g, u32 gpc); |
306 | int (*setup_rop_mapping)(struct gk20a *g, struct gr_gk20a *gr); | 306 | int (*setup_rop_mapping)(struct gk20a *g, struct gr_gk20a *gr); |
307 | int (*init_sw_veid_bundle)(struct gk20a *g); | 307 | int (*init_sw_veid_bundle)(struct gk20a *g); |
308 | void (*program_zcull_mapping)(struct gk20a *g, | ||
309 | u32 zcull_alloc_num, u32 *zcull_map_tiles); | ||
308 | } gr; | 310 | } gr; |
309 | const char *name; | 311 | const char *name; |
310 | struct { | 312 | struct { |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 75f4379d..7f24747c 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -4301,6 +4301,107 @@ void gr_gk20a_init_cg_mode(struct gk20a *g, u32 cgmode, u32 mode_config) | |||
4301 | } | 4301 | } |
4302 | } | 4302 | } |
4303 | 4303 | ||
4304 | void gr_gk20a_program_zcull_mapping(struct gk20a *g, u32 zcull_num_entries, | ||
4305 | u32 *zcull_map_tiles) | ||
4306 | { | ||
4307 | u32 val; | ||
4308 | |||
4309 | gk20a_dbg_fn(""); | ||
4310 | |||
4311 | if (zcull_num_entries >= 8) { | ||
4312 | gk20a_dbg_fn("map0"); | ||
4313 | val = | ||
4314 | gr_gpcs_zcull_sm_in_gpc_number_map0_tile_0_f( | ||
4315 | zcull_map_tiles[0]) | | ||
4316 | gr_gpcs_zcull_sm_in_gpc_number_map0_tile_1_f( | ||
4317 | zcull_map_tiles[1]) | | ||
4318 | gr_gpcs_zcull_sm_in_gpc_number_map0_tile_2_f( | ||
4319 | zcull_map_tiles[2]) | | ||
4320 | gr_gpcs_zcull_sm_in_gpc_number_map0_tile_3_f( | ||
4321 | zcull_map_tiles[3]) | | ||
4322 | gr_gpcs_zcull_sm_in_gpc_number_map0_tile_4_f( | ||
4323 | zcull_map_tiles[4]) | | ||
4324 | gr_gpcs_zcull_sm_in_gpc_number_map0_tile_5_f( | ||
4325 | zcull_map_tiles[5]) | | ||
4326 | gr_gpcs_zcull_sm_in_gpc_number_map0_tile_6_f( | ||
4327 | zcull_map_tiles[6]) | | ||
4328 | gr_gpcs_zcull_sm_in_gpc_number_map0_tile_7_f( | ||
4329 | zcull_map_tiles[7]); | ||
4330 | |||
4331 | gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map0_r(), val); | ||
4332 | } | ||
4333 | |||
4334 | if (zcull_num_entries >= 16) { | ||
4335 | gk20a_dbg_fn("map1"); | ||
4336 | val = | ||
4337 | gr_gpcs_zcull_sm_in_gpc_number_map1_tile_8_f( | ||
4338 | zcull_map_tiles[8]) | | ||
4339 | gr_gpcs_zcull_sm_in_gpc_number_map1_tile_9_f( | ||
4340 | zcull_map_tiles[9]) | | ||
4341 | gr_gpcs_zcull_sm_in_gpc_number_map1_tile_10_f( | ||
4342 | zcull_map_tiles[10]) | | ||
4343 | gr_gpcs_zcull_sm_in_gpc_number_map1_tile_11_f( | ||
4344 | zcull_map_tiles[11]) | | ||
4345 | gr_gpcs_zcull_sm_in_gpc_number_map1_tile_12_f( | ||
4346 | zcull_map_tiles[12]) | | ||
4347 | gr_gpcs_zcull_sm_in_gpc_number_map1_tile_13_f( | ||
4348 | zcull_map_tiles[13]) | | ||
4349 | gr_gpcs_zcull_sm_in_gpc_number_map1_tile_14_f( | ||
4350 | zcull_map_tiles[14]) | | ||
4351 | gr_gpcs_zcull_sm_in_gpc_number_map1_tile_15_f( | ||
4352 | zcull_map_tiles[15]); | ||
4353 | |||
4354 | gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map1_r(), val); | ||
4355 | } | ||
4356 | |||
4357 | if (zcull_num_entries >= 24) { | ||
4358 | gk20a_dbg_fn("map2"); | ||
4359 | val = | ||
4360 | gr_gpcs_zcull_sm_in_gpc_number_map2_tile_16_f( | ||
4361 | zcull_map_tiles[16]) | | ||
4362 | gr_gpcs_zcull_sm_in_gpc_number_map2_tile_17_f( | ||
4363 | zcull_map_tiles[17]) | | ||
4364 | gr_gpcs_zcull_sm_in_gpc_number_map2_tile_18_f( | ||
4365 | zcull_map_tiles[18]) | | ||
4366 | gr_gpcs_zcull_sm_in_gpc_number_map2_tile_19_f( | ||
4367 | zcull_map_tiles[19]) | | ||
4368 | gr_gpcs_zcull_sm_in_gpc_number_map2_tile_20_f( | ||
4369 | zcull_map_tiles[20]) | | ||
4370 | gr_gpcs_zcull_sm_in_gpc_number_map2_tile_21_f( | ||
4371 | zcull_map_tiles[21]) | | ||
4372 | gr_gpcs_zcull_sm_in_gpc_number_map2_tile_22_f( | ||
4373 | zcull_map_tiles[22]) | | ||
4374 | gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_f( | ||
4375 | zcull_map_tiles[23]); | ||
4376 | |||
4377 | gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map2_r(), val); | ||
4378 | } | ||
4379 | |||
4380 | if (zcull_num_entries >= 32) { | ||
4381 | gk20a_dbg_fn("map3"); | ||
4382 | val = | ||
4383 | gr_gpcs_zcull_sm_in_gpc_number_map3_tile_24_f( | ||
4384 | zcull_map_tiles[24]) | | ||
4385 | gr_gpcs_zcull_sm_in_gpc_number_map3_tile_25_f( | ||
4386 | zcull_map_tiles[25]) | | ||
4387 | gr_gpcs_zcull_sm_in_gpc_number_map3_tile_26_f( | ||
4388 | zcull_map_tiles[26]) | | ||
4389 | gr_gpcs_zcull_sm_in_gpc_number_map3_tile_27_f( | ||
4390 | zcull_map_tiles[27]) | | ||
4391 | gr_gpcs_zcull_sm_in_gpc_number_map3_tile_28_f( | ||
4392 | zcull_map_tiles[28]) | | ||
4393 | gr_gpcs_zcull_sm_in_gpc_number_map3_tile_29_f( | ||
4394 | zcull_map_tiles[29]) | | ||
4395 | gr_gpcs_zcull_sm_in_gpc_number_map3_tile_30_f( | ||
4396 | zcull_map_tiles[30]) | | ||
4397 | gr_gpcs_zcull_sm_in_gpc_number_map3_tile_31_f( | ||
4398 | zcull_map_tiles[31]); | ||
4399 | |||
4400 | gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map3_r(), val); | ||
4401 | } | ||
4402 | |||
4403 | } | ||
4404 | |||
4304 | static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr) | 4405 | static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr) |
4305 | { | 4406 | { |
4306 | u32 gpc_index, gpc_tpc_count, gpc_zcull_count; | 4407 | u32 gpc_index, gpc_tpc_count, gpc_zcull_count; |
@@ -4310,25 +4411,31 @@ static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr) | |||
4310 | u32 offset; | 4411 | u32 offset; |
4311 | bool floorsweep = false; | 4412 | bool floorsweep = false; |
4312 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | 4413 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); |
4313 | int num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS); | 4414 | u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS); |
4314 | int num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); | 4415 | u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, |
4416 | GPU_LIT_NUM_TPC_PER_GPC); | ||
4417 | u32 zcull_alloc_num = num_gpcs * num_tpc_per_gpc; | ||
4315 | 4418 | ||
4316 | if (!gr->map_tiles) | 4419 | if (!gr->map_tiles) |
4317 | return -1; | 4420 | return -1; |
4318 | 4421 | ||
4319 | zcull_map_tiles = kzalloc(num_gpcs * | 4422 | if (zcull_alloc_num % 8 != 0) { |
4320 | num_tpc_per_gpc * sizeof(u32), GFP_KERNEL); | 4423 | /* Total 8 fields per map reg i.e. tile_0 to tile_7*/ |
4424 | zcull_alloc_num += (zcull_alloc_num % 8); | ||
4425 | } | ||
4426 | zcull_map_tiles = kzalloc(zcull_alloc_num * | ||
4427 | sizeof(u32), GFP_KERNEL); | ||
4321 | if (!zcull_map_tiles) { | 4428 | if (!zcull_map_tiles) { |
4322 | gk20a_err(dev_from_gk20a(g), | 4429 | gk20a_err(dev_from_gk20a(g), |
4323 | "failed to allocate zcull temp buffers"); | 4430 | "failed to allocate zcull map titles"); |
4324 | return -ENOMEM; | 4431 | return -ENOMEM; |
4325 | } | 4432 | } |
4326 | zcull_bank_counters = kzalloc(num_gpcs * | 4433 | zcull_bank_counters = kzalloc(zcull_alloc_num * |
4327 | num_tpc_per_gpc * sizeof(u32), GFP_KERNEL); | 4434 | sizeof(u32), GFP_KERNEL); |
4328 | 4435 | ||
4329 | if (!zcull_bank_counters) { | 4436 | if (!zcull_bank_counters) { |
4330 | gk20a_err(dev_from_gk20a(g), | 4437 | gk20a_err(dev_from_gk20a(g), |
4331 | "failed to allocate zcull temp buffers"); | 4438 | "failed to allocate zcull bank counters"); |
4332 | kfree(zcull_map_tiles); | 4439 | kfree(zcull_map_tiles); |
4333 | return -ENOMEM; | 4440 | return -ENOMEM; |
4334 | } | 4441 | } |
@@ -4339,45 +4446,9 @@ static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr) | |||
4339 | zcull_bank_counters[gr->map_tiles[map_counter]]++; | 4446 | zcull_bank_counters[gr->map_tiles[map_counter]]++; |
4340 | } | 4447 | } |
4341 | 4448 | ||
4342 | gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map0_r(), | 4449 | if (g->ops.gr.program_zcull_mapping) |
4343 | gr_gpcs_zcull_sm_in_gpc_number_map0_tile_0_f(zcull_map_tiles[0]) | | 4450 | g->ops.gr.program_zcull_mapping(g, zcull_alloc_num, |
4344 | gr_gpcs_zcull_sm_in_gpc_number_map0_tile_1_f(zcull_map_tiles[1]) | | 4451 | zcull_map_tiles); |
4345 | gr_gpcs_zcull_sm_in_gpc_number_map0_tile_2_f(zcull_map_tiles[2]) | | ||
4346 | gr_gpcs_zcull_sm_in_gpc_number_map0_tile_3_f(zcull_map_tiles[3]) | | ||
4347 | gr_gpcs_zcull_sm_in_gpc_number_map0_tile_4_f(zcull_map_tiles[4]) | | ||
4348 | gr_gpcs_zcull_sm_in_gpc_number_map0_tile_5_f(zcull_map_tiles[5]) | | ||
4349 | gr_gpcs_zcull_sm_in_gpc_number_map0_tile_6_f(zcull_map_tiles[6]) | | ||
4350 | gr_gpcs_zcull_sm_in_gpc_number_map0_tile_7_f(zcull_map_tiles[7])); | ||
4351 | |||
4352 | gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map1_r(), | ||
4353 | gr_gpcs_zcull_sm_in_gpc_number_map1_tile_8_f(zcull_map_tiles[8]) | | ||
4354 | gr_gpcs_zcull_sm_in_gpc_number_map1_tile_9_f(zcull_map_tiles[9]) | | ||
4355 | gr_gpcs_zcull_sm_in_gpc_number_map1_tile_10_f(zcull_map_tiles[10]) | | ||
4356 | gr_gpcs_zcull_sm_in_gpc_number_map1_tile_11_f(zcull_map_tiles[11]) | | ||
4357 | gr_gpcs_zcull_sm_in_gpc_number_map1_tile_12_f(zcull_map_tiles[12]) | | ||
4358 | gr_gpcs_zcull_sm_in_gpc_number_map1_tile_13_f(zcull_map_tiles[13]) | | ||
4359 | gr_gpcs_zcull_sm_in_gpc_number_map1_tile_14_f(zcull_map_tiles[14]) | | ||
4360 | gr_gpcs_zcull_sm_in_gpc_number_map1_tile_15_f(zcull_map_tiles[15])); | ||
4361 | |||
4362 | gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map2_r(), | ||
4363 | gr_gpcs_zcull_sm_in_gpc_number_map2_tile_16_f(zcull_map_tiles[16]) | | ||
4364 | gr_gpcs_zcull_sm_in_gpc_number_map2_tile_17_f(zcull_map_tiles[17]) | | ||
4365 | gr_gpcs_zcull_sm_in_gpc_number_map2_tile_18_f(zcull_map_tiles[18]) | | ||
4366 | gr_gpcs_zcull_sm_in_gpc_number_map2_tile_19_f(zcull_map_tiles[19]) | | ||
4367 | gr_gpcs_zcull_sm_in_gpc_number_map2_tile_20_f(zcull_map_tiles[20]) | | ||
4368 | gr_gpcs_zcull_sm_in_gpc_number_map2_tile_21_f(zcull_map_tiles[21]) | | ||
4369 | gr_gpcs_zcull_sm_in_gpc_number_map2_tile_22_f(zcull_map_tiles[22]) | | ||
4370 | gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_f(zcull_map_tiles[23])); | ||
4371 | |||
4372 | gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map3_r(), | ||
4373 | gr_gpcs_zcull_sm_in_gpc_number_map3_tile_24_f(zcull_map_tiles[24]) | | ||
4374 | gr_gpcs_zcull_sm_in_gpc_number_map3_tile_25_f(zcull_map_tiles[25]) | | ||
4375 | gr_gpcs_zcull_sm_in_gpc_number_map3_tile_26_f(zcull_map_tiles[26]) | | ||
4376 | gr_gpcs_zcull_sm_in_gpc_number_map3_tile_27_f(zcull_map_tiles[27]) | | ||
4377 | gr_gpcs_zcull_sm_in_gpc_number_map3_tile_28_f(zcull_map_tiles[28]) | | ||
4378 | gr_gpcs_zcull_sm_in_gpc_number_map3_tile_29_f(zcull_map_tiles[29]) | | ||
4379 | gr_gpcs_zcull_sm_in_gpc_number_map3_tile_30_f(zcull_map_tiles[30]) | | ||
4380 | gr_gpcs_zcull_sm_in_gpc_number_map3_tile_31_f(zcull_map_tiles[31])); | ||
4381 | 4452 | ||
4382 | kfree(zcull_map_tiles); | 4453 | kfree(zcull_map_tiles); |
4383 | kfree(zcull_bank_counters); | 4454 | kfree(zcull_bank_counters); |
@@ -9059,4 +9130,5 @@ void gk20a_init_gr_ops(struct gpu_ops *gops) | |||
9059 | gops->gr.split_ltc_broadcast_addr = | 9130 | gops->gr.split_ltc_broadcast_addr = |
9060 | gr_gk20a_split_ltc_broadcast_addr_stub; | 9131 | gr_gk20a_split_ltc_broadcast_addr_stub; |
9061 | gops->gr.setup_rop_mapping = gr_gk20a_setup_rop_mapping; | 9132 | gops->gr.setup_rop_mapping = gr_gk20a_setup_rop_mapping; |
9133 | gops->gr.program_zcull_mapping = gr_gk20a_program_zcull_mapping; | ||
9062 | } | 9134 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index df6a3f3c..8c08459e 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h | |||
@@ -470,6 +470,8 @@ int gr_gk20a_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr, | |||
470 | struct channel_gk20a *c, u64 zcull_va, u32 mode); | 470 | struct channel_gk20a *c, u64 zcull_va, u32 mode); |
471 | int gr_gk20a_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr, | 471 | int gr_gk20a_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr, |
472 | struct gr_zcull_info *zcull_params); | 472 | struct gr_zcull_info *zcull_params); |
473 | void gr_gk20a_program_zcull_mapping(struct gk20a *g, u32 zcull_num_entries, | ||
474 | u32 *zcull_map_tiles); | ||
473 | /* zbc */ | 475 | /* zbc */ |
474 | int gr_gk20a_add_zbc(struct gk20a *g, struct gr_gk20a *gr, | 476 | int gr_gk20a_add_zbc(struct gk20a *g, struct gr_gk20a *gr, |
475 | struct zbc_entry *zbc_val); | 477 | struct zbc_entry *zbc_val); |
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index c2aa64ba..275224e7 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c | |||
@@ -1566,4 +1566,5 @@ void gm20b_init_gr(struct gpu_ops *gops) | |||
1566 | gops->gr.is_ltcn_ltss_addr = gr_gm20b_is_ltcn_ltss_addr; | 1566 | gops->gr.is_ltcn_ltss_addr = gr_gm20b_is_ltcn_ltss_addr; |
1567 | gops->gr.split_lts_broadcast_addr = gr_gm20b_split_lts_broadcast_addr; | 1567 | gops->gr.split_lts_broadcast_addr = gr_gm20b_split_lts_broadcast_addr; |
1568 | gops->gr.split_ltc_broadcast_addr = gr_gm20b_split_ltc_broadcast_addr; | 1568 | gops->gr.split_ltc_broadcast_addr = gr_gm20b_split_ltc_broadcast_addr; |
1569 | gops->gr.program_zcull_mapping = gr_gk20a_program_zcull_mapping; | ||
1569 | } | 1570 | } |