From 9891cb117e538f1ea5d19171a3c88422cdce7162 Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Wed, 21 Jun 2017 21:28:10 -0700 Subject: gpu: nvgpu: add gr ops get_sm_hww_global_esr Required for multiple SM support and t19x sm register address changes JIRA GPUT19X-75 Change-Id: I437095cb8f8d2ba31b85594a7609532991441a37 Signed-off-by: Seema Khowala Reviewed-on: https://git-master/r/1514040 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gk20a/gk20a.h | 2 + drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 87 +++++++++++++++++++------------------- drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 1 + drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 1 + drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 3 +- 5 files changed, 50 insertions(+), 44 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index d45477fc..70b1ac5f 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -293,6 +293,8 @@ struct gpu_ops { bool *early_exit, bool *ignore_debugger); u32 (*get_sm_hww_warp_esr)(struct gk20a *g, u32 gpc, u32 tpc, u32 sm); + u32 (*get_sm_hww_global_esr)(struct gk20a *g, + u32 gpc, u32 tpc, u32 sm); void (*get_esr_sm_sel)(struct gk20a *g, u32 gpc, u32 tpc, u32 *esr_sm_sel); int (*handle_sm_exception)(struct gk20a *g, diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 6fe330c0..b714b2e2 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -5525,9 +5525,9 @@ int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, int ret = 0; bool do_warp_sync = false, early_exit = false, ignore_debugger = false; bool disable_sm_exceptions = true; - u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); - u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); - u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; + u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc); + bool sm_debugger_attached; + u32 global_esr, warp_esr; /* these three interrupts don't require locking down the SM. They can * be handled by usermode clients as they aren't fatal. Additionally, @@ -5537,27 +5537,26 @@ int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, u32 global_mask = gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f() | gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f() | gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f(); - u32 global_esr, warp_esr; - bool sm_debugger_attached = g->ops.gr.sm_debugger_attached(g); gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); - global_esr = gk20a_readl(g, - gr_gpc0_tpc0_sm_hww_global_esr_r() + offset); + sm_debugger_attached = g->ops.gr.sm_debugger_attached(g); + + global_esr = g->ops.gr.get_sm_hww_global_esr(g, gpc, tpc, sm); + *hww_global_esr = global_esr; warp_esr = g->ops.gr.get_sm_hww_warp_esr(g, gpc, tpc, sm); if (!sm_debugger_attached) { - nvgpu_err(g, "sm hww global %08x warp %08x", + nvgpu_err(g, "sm hww global 0x%08x warp 0x%08x", global_esr, warp_esr); return -EFAULT; } gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, - "sm hww global %08x warp %08x", global_esr, warp_esr); + "sm hww global 0x%08x warp 0x%08x", global_esr, warp_esr); gr_gk20a_elpg_protected_call(g, g->ops.gr.record_sm_error_state(g, gpc, tpc)); - *hww_global_esr = global_esr; if (g->ops.gr.pre_process_sm_exception) { ret = g->ops.gr.pre_process_sm_exception(g, gpc, tpc, sm, @@ -5615,7 +5614,8 @@ int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, } if (ignore_debugger) - gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "ignore_debugger set, skipping event posting"); + gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, + "ignore_debugger set, skipping event posting"); else *post_event |= true; @@ -5655,15 +5655,14 @@ static int gk20a_gr_handle_tpc_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 *hww_global_esr) { int ret = 0; - u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); - u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); - u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; + u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc); u32 tpc_exception = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_r() + offset); u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, ""); + /* check if an sm exeption is pending */ if (gr_gpc0_tpc0_tpccs_tpc_exception_sm_v(tpc_exception) == gr_gpc0_tpc0_tpccs_tpc_exception_sm_pending_v()) { @@ -5685,6 +5684,11 @@ static int gk20a_gr_handle_tpc_exception(struct gk20a *g, u32 gpc, u32 tpc, ret = g->ops.gr.handle_sm_exception(g, gpc, tpc, sm, post_event, fault_ch, hww_global_esr); + /* clear the hwws, also causes tpc and gpc + * exceptions to be cleared + */ + gk20a_gr_clear_sm_hww(g, gpc, tpc, *hww_global_esr); + } } @@ -5704,12 +5708,10 @@ static int gk20a_gr_handle_gpc_exception(struct gk20a *g, bool *post_event, struct channel_gk20a *fault_ch, u32 *hww_global_esr) { int ret = 0; - u32 gpc_offset, tpc_offset, gpc, tpc; + u32 gpc_offset, gpc, tpc; struct gr_gk20a *gr = &g->gr; u32 exception1 = gk20a_readl(g, gr_exception1_r()); - u32 gpc_exception, global_esr; - u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); - u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); + u32 gpc_exception; gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, ""); @@ -5720,7 +5722,7 @@ static int gk20a_gr_handle_gpc_exception(struct gk20a *g, bool *post_event, gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "GPC%d exception pending", gpc); - gpc_offset = gpc_stride * gpc; + gpc_offset = gk20a_gr_gpc_offset(g, gpc); gpc_exception = gk20a_readl(g, gr_gpc0_gpccs_gpc_exception_r() + gpc_offset); @@ -5734,18 +5736,9 @@ static int gk20a_gr_handle_gpc_exception(struct gk20a *g, bool *post_event, gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "GPC%d: TPC%d exception pending", gpc, tpc); - tpc_offset = tpc_in_gpc_stride * tpc; - - global_esr = gk20a_readl(g, - gr_gpc0_tpc0_sm_hww_global_esr_r() + - gpc_offset + tpc_offset); - ret = gk20a_gr_handle_tpc_exception(g, gpc, tpc, post_event, fault_ch, hww_global_esr); - /* clear the hwws, also causes tpc and gpc - * exceptions to be cleared */ - gk20a_gr_clear_sm_hww(g, gpc, tpc, global_esr); } /* Handle GCC exception */ @@ -7946,8 +7939,8 @@ int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, /* wait for the sm to lock down */ do { - u32 global_esr = gk20a_readl(g, - gr_gpc0_tpc0_sm_hww_global_esr_r() + offset); + u32 global_esr = g->ops.gr.get_sm_hww_global_esr(g, + gpc, tpc, sm); dbgr_status0 = gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_status0_r() + offset); @@ -8440,28 +8433,25 @@ int gr_gk20a_resume_from_pause(struct gk20a *g) int gr_gk20a_clear_sm_errors(struct gk20a *g) { int ret = 0; - u32 gpc_offset, tpc_offset, gpc, tpc; + u32 gpc, tpc, sm; struct gr_gk20a *gr = &g->gr; u32 global_esr; - u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); - u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); + u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); for (gpc = 0; gpc < gr->gpc_count; gpc++) { - gpc_offset = gpc_stride * gpc; - /* check if any tpc has an exception */ for (tpc = 0; tpc < gr->tpc_count; tpc++) { - tpc_offset = tpc_in_gpc_stride * tpc; + for (sm = 0; sm < sm_per_tpc; sm++) { + global_esr = g->ops.gr.get_sm_hww_global_esr(g, + gpc, tpc, sm); - global_esr = gk20a_readl(g, - gr_gpc0_tpc0_sm_hww_global_esr_r() + - gpc_offset + tpc_offset); - - /* clear the hwws, also causes tpc and gpc - * exceptions to be cleared */ - gk20a_gr_clear_sm_hww(g, gpc, tpc, global_esr); + /* clearing hwws, also causes tpc and gpc + * exceptions to be cleared + */ + gk20a_gr_clear_sm_hww(g, gpc, tpc, global_esr); + } } } @@ -8498,3 +8488,14 @@ u32 gk20a_gr_get_sm_hww_warp_esr(struct gk20a *g, u32 gpc, u32 tpc, u32 sm) gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset); return hww_warp_esr; } + +u32 gk20a_gr_get_sm_hww_global_esr(struct gk20a *g, u32 gpc, u32 tpc, u32 sm) +{ + u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc); + + u32 hww_global_esr = gk20a_readl(g, + gr_gpc0_tpc0_sm_hww_global_esr_r() + offset); + + return hww_global_esr; +} + diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 924410c3..3dbf5697 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -694,6 +694,7 @@ int gr_gk20a_get_ctx_id(struct gk20a *g, u32 *ctx_id); u32 gk20a_gr_get_sm_hww_warp_esr(struct gk20a *g, u32 gpc, u32 tpc, u32 sm); +u32 gk20a_gr_get_sm_hww_global_esr(struct gk20a *g, u32 gpc, u32 tpc, u32 sm); int gr_gk20a_wait_fe_idle(struct gk20a *g, unsigned long duration_ms, u32 expect_delay); diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index f3ba11b7..3a1d579a 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c @@ -1635,4 +1635,5 @@ void gm20b_init_gr(struct gpu_ops *gops) gops->gr.resume_single_sm = gk20a_gr_resume_single_sm; gops->gr.resume_all_sms = gk20a_gr_resume_all_sms; gops->gr.get_sm_hww_warp_esr = gk20a_gr_get_sm_hww_warp_esr; + gops->gr.get_sm_hww_global_esr = gk20a_gr_get_sm_hww_global_esr; } diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index 64ec5e1a..27d609d1 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c @@ -1833,7 +1833,8 @@ static int gr_gp10b_pre_process_sm_exception(struct gk20a *g, } /* reset the HWW errors after locking down */ - global_esr_copy = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset); + global_esr_copy = g->ops.gr.get_sm_hww_global_esr(g, + gpc, tpc, sm); gk20a_gr_clear_sm_hww(g, gpc, tpc, global_esr_copy); gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "CILP: HWWs cleared for gpc %d tpc %d\n", -- cgit v1.2.2