diff options
author | Terje Bergstrom <tbergstrom@nvidia.com> | 2017-03-23 14:03:15 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-03-29 12:15:59 -0400 |
commit | 4022b989aa2e91fe77ed52df49d45838f6d8b9bb (patch) | |
tree | 4c8240ac83887c21db902a255306c67041c4525c /drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |
parent | f04031e5e8837abb2be3feb0ee30e1af54de7845 (diff) |
gpu: nvgpu: Remove direct HW access from ctrl_gk20a.c
ctrl_gk20a.c had some direct accesses to hardware. These violate the
HAL rules, because we don't have per-GPU ctrl, and thus the code
cannot be made GPU independent.
Move all GR accesses to new GR HALs and use existing bus HAL for
accessing timer. Remove #includes of all hardware headers.
JIRA NVGPU-28
Change-Id: I57e67519f62e9bd6c3e725e1bef6e366190f5834
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1327001
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gr_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 178 |
1 files changed, 178 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 3df19a4e..172931d7 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -9291,6 +9291,178 @@ static void gr_gk20a_split_ltc_broadcast_addr_stub(struct gk20a *g, u32 addr, | |||
9291 | { | 9291 | { |
9292 | } | 9292 | } |
9293 | 9293 | ||
9294 | int gr_gk20a_inval_icache(struct gk20a *g, struct channel_gk20a *ch) | ||
9295 | { | ||
9296 | int err = 0; | ||
9297 | u32 cache_ctrl, regval; | ||
9298 | struct nvgpu_dbg_gpu_reg_op ops; | ||
9299 | |||
9300 | ops.op = REGOP(READ_32); | ||
9301 | ops.type = REGOP(TYPE_GR_CTX); | ||
9302 | ops.status = REGOP(STATUS_SUCCESS); | ||
9303 | ops.value_hi = 0; | ||
9304 | ops.and_n_mask_lo = 0; | ||
9305 | ops.and_n_mask_hi = 0; | ||
9306 | ops.offset = gr_pri_gpc0_gcc_dbg_r(); | ||
9307 | |||
9308 | err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 0, 1); | ||
9309 | if (err) { | ||
9310 | gk20a_err(dev_from_gk20a(g), "Failed to read register"); | ||
9311 | return err; | ||
9312 | } | ||
9313 | |||
9314 | regval = ops.value_lo; | ||
9315 | |||
9316 | ops.op = REGOP(WRITE_32); | ||
9317 | ops.value_lo = set_field(regval, gr_pri_gpcs_gcc_dbg_invalidate_m(), 1); | ||
9318 | err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0); | ||
9319 | if (err) { | ||
9320 | gk20a_err(dev_from_gk20a(g), "Failed to write register"); | ||
9321 | return err; | ||
9322 | } | ||
9323 | |||
9324 | ops.op = REGOP(READ_32); | ||
9325 | ops.offset = gr_pri_gpc0_tpc0_sm_cache_control_r(); | ||
9326 | err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 0, 1); | ||
9327 | if (err) { | ||
9328 | gk20a_err(dev_from_gk20a(g), "Failed to read register"); | ||
9329 | return err; | ||
9330 | } | ||
9331 | |||
9332 | cache_ctrl = gk20a_readl(g, gr_pri_gpc0_tpc0_sm_cache_control_r()); | ||
9333 | cache_ctrl = set_field(cache_ctrl, gr_pri_gpcs_tpcs_sm_cache_control_invalidate_cache_m(), 1); | ||
9334 | gk20a_writel(g, gr_pri_gpc0_tpc0_sm_cache_control_r(), cache_ctrl); | ||
9335 | |||
9336 | return 0; | ||
9337 | } | ||
9338 | |||
9339 | int gr_gk20a_trigger_suspend(struct gk20a *g) | ||
9340 | { | ||
9341 | int err = 0; | ||
9342 | u32 dbgr_control0; | ||
9343 | |||
9344 | /* assert stop trigger. uniformity assumption: all SMs will have | ||
9345 | * the same state in dbg_control0. */ | ||
9346 | dbgr_control0 = | ||
9347 | gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r()); | ||
9348 | dbgr_control0 |= gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_enable_f(); | ||
9349 | |||
9350 | /* broadcast write */ | ||
9351 | gk20a_writel(g, | ||
9352 | gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0); | ||
9353 | |||
9354 | return err; | ||
9355 | } | ||
9356 | |||
9357 | int gr_gk20a_wait_for_pause(struct gk20a *g, struct warpstate *w_state) | ||
9358 | { | ||
9359 | int err = 0; | ||
9360 | struct gr_gk20a *gr = &g->gr; | ||
9361 | u32 gpc, tpc, sm_id; | ||
9362 | u32 global_mask; | ||
9363 | |||
9364 | /* Wait for the SMs to reach full stop. This condition is: | ||
9365 | * 1) All SMs with valid warps must be in the trap handler (SM_IN_TRAP_MODE) | ||
9366 | * 2) All SMs in the trap handler must have equivalent VALID and PAUSED warp | ||
9367 | * masks. | ||
9368 | */ | ||
9369 | global_mask = gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f() | | ||
9370 | gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f() | | ||
9371 | gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f(); | ||
9372 | |||
9373 | /* Lock down all SMs */ | ||
9374 | for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { | ||
9375 | |||
9376 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; | ||
9377 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; | ||
9378 | |||
9379 | err = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask, false); | ||
9380 | |||
9381 | if (err) { | ||
9382 | gk20a_err(dev_from_gk20a(g), "sm did not lock down!\n"); | ||
9383 | return err; | ||
9384 | } | ||
9385 | } | ||
9386 | |||
9387 | /* Read the warp status */ | ||
9388 | g->ops.gr.bpt_reg_info(g, w_state); | ||
9389 | |||
9390 | return 0; | ||
9391 | } | ||
9392 | |||
9393 | int gr_gk20a_resume_from_pause(struct gk20a *g) | ||
9394 | { | ||
9395 | int err = 0; | ||
9396 | |||
9397 | /* Clear the pause mask to tell the GPU we want to resume everyone */ | ||
9398 | gk20a_writel(g, | ||
9399 | gr_gpcs_tpcs_sm_dbgr_bpt_pause_mask_r(), 0); | ||
9400 | |||
9401 | /* explicitly re-enable forwarding of SM interrupts upon any resume */ | ||
9402 | gk20a_writel(g, gr_gpcs_tpcs_tpccs_tpc_exception_en_r(), | ||
9403 | gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f()); | ||
9404 | |||
9405 | /* Now resume all sms, write a 0 to the stop trigger | ||
9406 | * then a 1 to the run trigger */ | ||
9407 | gk20a_resume_all_sms(g); | ||
9408 | |||
9409 | return err; | ||
9410 | } | ||
9411 | |||
9412 | int gr_gk20a_clear_sm_errors(struct gk20a *g) | ||
9413 | { | ||
9414 | int ret = 0; | ||
9415 | u32 gpc_offset, tpc_offset, gpc, tpc; | ||
9416 | struct gr_gk20a *gr = &g->gr; | ||
9417 | u32 global_esr; | ||
9418 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
9419 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
9420 | |||
9421 | for (gpc = 0; gpc < gr->gpc_count; gpc++) { | ||
9422 | |||
9423 | gpc_offset = gpc_stride * gpc; | ||
9424 | |||
9425 | /* check if any tpc has an exception */ | ||
9426 | for (tpc = 0; tpc < gr->tpc_count; tpc++) { | ||
9427 | |||
9428 | tpc_offset = tpc_in_gpc_stride * tpc; | ||
9429 | |||
9430 | global_esr = gk20a_readl(g, | ||
9431 | gr_gpc0_tpc0_sm_hww_global_esr_r() + | ||
9432 | gpc_offset + tpc_offset); | ||
9433 | |||
9434 | /* clear the hwws, also causes tpc and gpc | ||
9435 | * exceptions to be cleared */ | ||
9436 | gk20a_gr_clear_sm_hww(g, gpc, tpc, global_esr); | ||
9437 | } | ||
9438 | } | ||
9439 | |||
9440 | return ret; | ||
9441 | } | ||
9442 | |||
9443 | u32 gr_gk20a_tpc_enabled_exceptions(struct gk20a *g) | ||
9444 | { | ||
9445 | struct gr_gk20a *gr = &g->gr; | ||
9446 | u32 sm_id, tpc_exception_en = 0; | ||
9447 | u32 offset, regval, tpc_offset, gpc_offset; | ||
9448 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
9449 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
9450 | |||
9451 | for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { | ||
9452 | |||
9453 | tpc_offset = tpc_in_gpc_stride * g->gr.sm_to_cluster[sm_id].tpc_index; | ||
9454 | gpc_offset = gpc_stride * g->gr.sm_to_cluster[sm_id].gpc_index; | ||
9455 | offset = tpc_offset + gpc_offset; | ||
9456 | |||
9457 | regval = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r() + | ||
9458 | offset); | ||
9459 | /* Each bit represents corresponding enablement state, bit 0 corrsponds to SM0 */ | ||
9460 | tpc_exception_en |= gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_v(regval) << sm_id; | ||
9461 | } | ||
9462 | |||
9463 | return tpc_exception_en; | ||
9464 | } | ||
9465 | |||
9294 | void gk20a_init_gr_ops(struct gpu_ops *gops) | 9466 | void gk20a_init_gr_ops(struct gpu_ops *gops) |
9295 | { | 9467 | { |
9296 | gops->gr.access_smpc_reg = gr_gk20a_access_smpc_reg; | 9468 | gops->gr.access_smpc_reg = gr_gk20a_access_smpc_reg; |
@@ -9376,4 +9548,10 @@ void gk20a_init_gr_ops(struct gpu_ops *gops) | |||
9376 | gops->gr.write_zcull_ptr = gr_gk20a_write_zcull_ptr; | 9548 | gops->gr.write_zcull_ptr = gr_gk20a_write_zcull_ptr; |
9377 | gops->gr.write_pm_ptr = gr_gk20a_write_pm_ptr; | 9549 | gops->gr.write_pm_ptr = gr_gk20a_write_pm_ptr; |
9378 | gops->gr.init_elcg_mode = gr_gk20a_init_elcg_mode; | 9550 | gops->gr.init_elcg_mode = gr_gk20a_init_elcg_mode; |
9551 | gops->gr.inval_icache = gr_gk20a_inval_icache; | ||
9552 | gops->gr.trigger_suspend = gr_gk20a_trigger_suspend; | ||
9553 | gops->gr.wait_for_pause = gr_gk20a_wait_for_pause; | ||
9554 | gops->gr.resume_from_pause = gr_gk20a_resume_from_pause; | ||
9555 | gops->gr.clear_sm_errors = gr_gk20a_clear_sm_errors; | ||
9556 | gops->gr.tpc_enabled_exceptions = gr_gk20a_tpc_enabled_exceptions; | ||
9379 | } | 9557 | } |