summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2017-03-23 14:03:15 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-03-29 12:15:59 -0400
commit4022b989aa2e91fe77ed52df49d45838f6d8b9bb (patch)
tree4c8240ac83887c21db902a255306c67041c4525c /drivers/gpu/nvgpu/gk20a/gr_gk20a.c
parentf04031e5e8837abb2be3feb0ee30e1af54de7845 (diff)
gpu: nvgpu: Remove direct HW access from ctrl_gk20a.c
ctrl_gk20a.c had some direct accesses to hardware. These violate the HAL rules, because we don't have per-GPU ctrl, and thus the code cannot be made GPU independent. Move all GR accesses to new GR HALs and use existing bus HAL for accessing timer. Remove #includes of all hardware headers. JIRA NVGPU-28 Change-Id: I57e67519f62e9bd6c3e725e1bef6e366190f5834 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/1327001 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gr_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c178
1 files changed, 178 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 3df19a4e..172931d7 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -9291,6 +9291,178 @@ static void gr_gk20a_split_ltc_broadcast_addr_stub(struct gk20a *g, u32 addr,
9291{ 9291{
9292} 9292}
9293 9293
9294int gr_gk20a_inval_icache(struct gk20a *g, struct channel_gk20a *ch)
9295{
9296 int err = 0;
9297 u32 cache_ctrl, regval;
9298 struct nvgpu_dbg_gpu_reg_op ops;
9299
9300 ops.op = REGOP(READ_32);
9301 ops.type = REGOP(TYPE_GR_CTX);
9302 ops.status = REGOP(STATUS_SUCCESS);
9303 ops.value_hi = 0;
9304 ops.and_n_mask_lo = 0;
9305 ops.and_n_mask_hi = 0;
9306 ops.offset = gr_pri_gpc0_gcc_dbg_r();
9307
9308 err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 0, 1);
9309 if (err) {
9310 gk20a_err(dev_from_gk20a(g), "Failed to read register");
9311 return err;
9312 }
9313
9314 regval = ops.value_lo;
9315
9316 ops.op = REGOP(WRITE_32);
9317 ops.value_lo = set_field(regval, gr_pri_gpcs_gcc_dbg_invalidate_m(), 1);
9318 err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0);
9319 if (err) {
9320 gk20a_err(dev_from_gk20a(g), "Failed to write register");
9321 return err;
9322 }
9323
9324 ops.op = REGOP(READ_32);
9325 ops.offset = gr_pri_gpc0_tpc0_sm_cache_control_r();
9326 err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 0, 1);
9327 if (err) {
9328 gk20a_err(dev_from_gk20a(g), "Failed to read register");
9329 return err;
9330 }
9331
9332 cache_ctrl = gk20a_readl(g, gr_pri_gpc0_tpc0_sm_cache_control_r());
9333 cache_ctrl = set_field(cache_ctrl, gr_pri_gpcs_tpcs_sm_cache_control_invalidate_cache_m(), 1);
9334 gk20a_writel(g, gr_pri_gpc0_tpc0_sm_cache_control_r(), cache_ctrl);
9335
9336 return 0;
9337}
9338
9339int gr_gk20a_trigger_suspend(struct gk20a *g)
9340{
9341 int err = 0;
9342 u32 dbgr_control0;
9343
9344 /* assert stop trigger. uniformity assumption: all SMs will have
9345 * the same state in dbg_control0. */
9346 dbgr_control0 =
9347 gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r());
9348 dbgr_control0 |= gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_enable_f();
9349
9350 /* broadcast write */
9351 gk20a_writel(g,
9352 gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0);
9353
9354 return err;
9355}
9356
9357int gr_gk20a_wait_for_pause(struct gk20a *g, struct warpstate *w_state)
9358{
9359 int err = 0;
9360 struct gr_gk20a *gr = &g->gr;
9361 u32 gpc, tpc, sm_id;
9362 u32 global_mask;
9363
9364 /* Wait for the SMs to reach full stop. This condition is:
9365 * 1) All SMs with valid warps must be in the trap handler (SM_IN_TRAP_MODE)
9366 * 2) All SMs in the trap handler must have equivalent VALID and PAUSED warp
9367 * masks.
9368 */
9369 global_mask = gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f() |
9370 gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f() |
9371 gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f();
9372
9373 /* Lock down all SMs */
9374 for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
9375
9376 gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
9377 tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
9378
9379 err = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask, false);
9380
9381 if (err) {
9382 gk20a_err(dev_from_gk20a(g), "sm did not lock down!\n");
9383 return err;
9384 }
9385 }
9386
9387 /* Read the warp status */
9388 g->ops.gr.bpt_reg_info(g, w_state);
9389
9390 return 0;
9391}
9392
9393int gr_gk20a_resume_from_pause(struct gk20a *g)
9394{
9395 int err = 0;
9396
9397 /* Clear the pause mask to tell the GPU we want to resume everyone */
9398 gk20a_writel(g,
9399 gr_gpcs_tpcs_sm_dbgr_bpt_pause_mask_r(), 0);
9400
9401 /* explicitly re-enable forwarding of SM interrupts upon any resume */
9402 gk20a_writel(g, gr_gpcs_tpcs_tpccs_tpc_exception_en_r(),
9403 gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f());
9404
9405 /* Now resume all sms, write a 0 to the stop trigger
9406 * then a 1 to the run trigger */
9407 gk20a_resume_all_sms(g);
9408
9409 return err;
9410}
9411
9412int gr_gk20a_clear_sm_errors(struct gk20a *g)
9413{
9414 int ret = 0;
9415 u32 gpc_offset, tpc_offset, gpc, tpc;
9416 struct gr_gk20a *gr = &g->gr;
9417 u32 global_esr;
9418 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
9419 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
9420
9421 for (gpc = 0; gpc < gr->gpc_count; gpc++) {
9422
9423 gpc_offset = gpc_stride * gpc;
9424
9425 /* check if any tpc has an exception */
9426 for (tpc = 0; tpc < gr->tpc_count; tpc++) {
9427
9428 tpc_offset = tpc_in_gpc_stride * tpc;
9429
9430 global_esr = gk20a_readl(g,
9431 gr_gpc0_tpc0_sm_hww_global_esr_r() +
9432 gpc_offset + tpc_offset);
9433
9434 /* clear the hwws, also causes tpc and gpc
9435 * exceptions to be cleared */
9436 gk20a_gr_clear_sm_hww(g, gpc, tpc, global_esr);
9437 }
9438 }
9439
9440 return ret;
9441}
9442
9443u32 gr_gk20a_tpc_enabled_exceptions(struct gk20a *g)
9444{
9445 struct gr_gk20a *gr = &g->gr;
9446 u32 sm_id, tpc_exception_en = 0;
9447 u32 offset, regval, tpc_offset, gpc_offset;
9448 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
9449 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
9450
9451 for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
9452
9453 tpc_offset = tpc_in_gpc_stride * g->gr.sm_to_cluster[sm_id].tpc_index;
9454 gpc_offset = gpc_stride * g->gr.sm_to_cluster[sm_id].gpc_index;
9455 offset = tpc_offset + gpc_offset;
9456
9457 regval = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r() +
9458 offset);
9459 /* Each bit represents corresponding enablement state, bit 0 corrsponds to SM0 */
9460 tpc_exception_en |= gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_v(regval) << sm_id;
9461 }
9462
9463 return tpc_exception_en;
9464}
9465
9294void gk20a_init_gr_ops(struct gpu_ops *gops) 9466void gk20a_init_gr_ops(struct gpu_ops *gops)
9295{ 9467{
9296 gops->gr.access_smpc_reg = gr_gk20a_access_smpc_reg; 9468 gops->gr.access_smpc_reg = gr_gk20a_access_smpc_reg;
@@ -9376,4 +9548,10 @@ void gk20a_init_gr_ops(struct gpu_ops *gops)
9376 gops->gr.write_zcull_ptr = gr_gk20a_write_zcull_ptr; 9548 gops->gr.write_zcull_ptr = gr_gk20a_write_zcull_ptr;
9377 gops->gr.write_pm_ptr = gr_gk20a_write_pm_ptr; 9549 gops->gr.write_pm_ptr = gr_gk20a_write_pm_ptr;
9378 gops->gr.init_elcg_mode = gr_gk20a_init_elcg_mode; 9550 gops->gr.init_elcg_mode = gr_gk20a_init_elcg_mode;
9551 gops->gr.inval_icache = gr_gk20a_inval_icache;
9552 gops->gr.trigger_suspend = gr_gk20a_trigger_suspend;
9553 gops->gr.wait_for_pause = gr_gk20a_wait_for_pause;
9554 gops->gr.resume_from_pause = gr_gk20a_resume_from_pause;
9555 gops->gr.clear_sm_errors = gr_gk20a_clear_sm_errors;
9556 gops->gr.tpc_enabled_exceptions = gr_gk20a_tpc_enabled_exceptions;
9379} 9557}