diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gr_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 187 |
1 files changed, 133 insertions, 54 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 452560d8..b3fc8ae1 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -79,6 +79,10 @@ static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c); | |||
79 | static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, | 79 | static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, |
80 | struct channel_gk20a *c); | 80 | struct channel_gk20a *c); |
81 | 81 | ||
82 | /* sm lock down */ | ||
83 | static int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, | ||
84 | u32 global_esr_mask, bool check_errors); | ||
85 | |||
82 | void gk20a_fecs_dump_falcon_stats(struct gk20a *g) | 86 | void gk20a_fecs_dump_falcon_stats(struct gk20a *g) |
83 | { | 87 | { |
84 | int i; | 88 | int i; |
@@ -5365,13 +5369,9 @@ unlock: | |||
5365 | return chid; | 5369 | return chid; |
5366 | } | 5370 | } |
5367 | 5371 | ||
5368 | static int gk20a_gr_lock_down_sm(struct gk20a *g, | 5372 | int gk20a_gr_lock_down_sm(struct gk20a *g, |
5369 | u32 gpc, u32 tpc, u32 global_esr_mask) | 5373 | u32 gpc, u32 tpc, u32 global_esr_mask) |
5370 | { | 5374 | { |
5371 | unsigned long end_jiffies = jiffies + | ||
5372 | msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); | ||
5373 | u32 delay = GR_IDLE_CHECK_DEFAULT; | ||
5374 | bool mmu_debug_mode_enabled = g->ops.mm.is_debug_mode_enabled(g); | ||
5375 | u32 offset = | 5375 | u32 offset = |
5376 | proj_gpc_stride_v() * gpc + proj_tpc_in_gpc_stride_v() * tpc; | 5376 | proj_gpc_stride_v() * gpc + proj_tpc_in_gpc_stride_v() * tpc; |
5377 | u32 dbgr_control0; | 5377 | u32 dbgr_control0; |
@@ -5386,55 +5386,8 @@ static int gk20a_gr_lock_down_sm(struct gk20a *g, | |||
5386 | gk20a_writel(g, | 5386 | gk20a_writel(g, |
5387 | gr_gpc0_tpc0_sm_dbgr_control0_r() + offset, dbgr_control0); | 5387 | gr_gpc0_tpc0_sm_dbgr_control0_r() + offset, dbgr_control0); |
5388 | 5388 | ||
5389 | /* wait for the sm to lock down */ | 5389 | return gk20a_gr_wait_for_sm_lock_down(g, gpc, tpc, global_esr_mask, |
5390 | do { | 5390 | true); |
5391 | u32 global_esr = gk20a_readl(g, | ||
5392 | gr_gpc0_tpc0_sm_hww_global_esr_r() + offset); | ||
5393 | u32 warp_esr = gk20a_readl(g, | ||
5394 | gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset); | ||
5395 | u32 dbgr_status0 = gk20a_readl(g, | ||
5396 | gr_gpc0_tpc0_sm_dbgr_status0_r() + offset); | ||
5397 | bool locked_down = | ||
5398 | (gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(dbgr_status0) == | ||
5399 | gr_gpc0_tpc0_sm_dbgr_status0_locked_down_true_v()); | ||
5400 | bool error_pending = | ||
5401 | (gr_gpc0_tpc0_sm_hww_warp_esr_error_v(warp_esr) != | ||
5402 | gr_gpc0_tpc0_sm_hww_warp_esr_error_none_v()) || | ||
5403 | ((global_esr & ~global_esr_mask) != 0); | ||
5404 | |||
5405 | if (locked_down || !error_pending) { | ||
5406 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, | ||
5407 | "GPC%d TPC%d: locked down SM", gpc, tpc); | ||
5408 | |||
5409 | /* de-assert stop trigger */ | ||
5410 | dbgr_control0 &= ~gr_gpc0_tpc0_sm_dbgr_control0_stop_trigger_enable_f(); | ||
5411 | gk20a_writel(g, | ||
5412 | gr_gpc0_tpc0_sm_dbgr_control0_r() + offset, | ||
5413 | dbgr_control0); | ||
5414 | |||
5415 | return 0; | ||
5416 | } | ||
5417 | |||
5418 | /* if an mmu fault is pending and mmu debug mode is not | ||
5419 | * enabled, the sm will never lock down. */ | ||
5420 | if (!mmu_debug_mode_enabled && gk20a_fifo_mmu_fault_pending(g)) { | ||
5421 | gk20a_err(dev_from_gk20a(g), | ||
5422 | "GPC%d TPC%d: mmu fault pending," | ||
5423 | " sm will never lock down!", gpc, tpc); | ||
5424 | return -EFAULT; | ||
5425 | } | ||
5426 | |||
5427 | usleep_range(delay, delay * 2); | ||
5428 | delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); | ||
5429 | |||
5430 | } while (time_before(jiffies, end_jiffies) | ||
5431 | || !tegra_platform_is_silicon()); | ||
5432 | |||
5433 | gk20a_err(dev_from_gk20a(g), | ||
5434 | "GPC%d TPC%d: timed out while trying to lock down SM", | ||
5435 | gpc, tpc); | ||
5436 | |||
5437 | return -EAGAIN; | ||
5438 | } | 5391 | } |
5439 | 5392 | ||
5440 | bool gk20a_gr_sm_debugger_attached(struct gk20a *g) | 5393 | bool gk20a_gr_sm_debugger_attached(struct gk20a *g) |
@@ -7198,6 +7151,131 @@ static u32 gr_gk20a_get_tpc_num(u32 addr) | |||
7198 | return 0; | 7151 | return 0; |
7199 | } | 7152 | } |
7200 | 7153 | ||
7154 | static int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, | ||
7155 | u32 global_esr_mask, bool check_errors) | ||
7156 | { | ||
7157 | unsigned long end_jiffies = jiffies + | ||
7158 | msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); | ||
7159 | u32 delay = GR_IDLE_CHECK_DEFAULT; | ||
7160 | bool mmu_debug_mode_enabled = g->ops.mm.is_debug_mode_enabled(g); | ||
7161 | u32 offset = | ||
7162 | proj_gpc_stride_v() * gpc + proj_tpc_in_gpc_stride_v() * tpc; | ||
7163 | |||
7164 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, | ||
7165 | "GPC%d TPC%d: locking down SM", gpc, tpc); | ||
7166 | |||
7167 | /* wait for the sm to lock down */ | ||
7168 | do { | ||
7169 | u32 global_esr = gk20a_readl(g, | ||
7170 | gr_gpc0_tpc0_sm_hww_global_esr_r() + offset); | ||
7171 | u32 warp_esr = gk20a_readl(g, | ||
7172 | gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset); | ||
7173 | u32 dbgr_status0 = gk20a_readl(g, | ||
7174 | gr_gpc0_tpc0_sm_dbgr_status0_r() + offset); | ||
7175 | bool locked_down = | ||
7176 | (gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(dbgr_status0) == | ||
7177 | gr_gpc0_tpc0_sm_dbgr_status0_locked_down_true_v()); | ||
7178 | bool no_error_pending = | ||
7179 | check_errors && | ||
7180 | (gr_gpc0_tpc0_sm_hww_warp_esr_error_v(warp_esr) == | ||
7181 | gr_gpc0_tpc0_sm_hww_warp_esr_error_none_v()) && | ||
7182 | ((global_esr & ~global_esr_mask) == 0); | ||
7183 | |||
7184 | if (locked_down || no_error_pending) { | ||
7185 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, | ||
7186 | "GPC%d TPC%d: locked down SM", gpc, tpc); | ||
7187 | return 0; | ||
7188 | } | ||
7189 | |||
7190 | /* if an mmu fault is pending and mmu debug mode is not | ||
7191 | * enabled, the sm will never lock down. */ | ||
7192 | if (!mmu_debug_mode_enabled && | ||
7193 | gk20a_fifo_mmu_fault_pending(g)) { | ||
7194 | gk20a_err(dev_from_gk20a(g), | ||
7195 | "GPC%d TPC%d: mmu fault pending," | ||
7196 | " sm will never lock down!", gpc, tpc); | ||
7197 | return -EFAULT; | ||
7198 | } | ||
7199 | |||
7200 | usleep_range(delay, delay * 2); | ||
7201 | delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); | ||
7202 | |||
7203 | } while (time_before(jiffies, end_jiffies) | ||
7204 | || !tegra_platform_is_silicon()); | ||
7205 | |||
7206 | gk20a_err(dev_from_gk20a(g), | ||
7207 | "GPC%d TPC%d: timed out while trying to lock down SM", | ||
7208 | gpc, tpc); | ||
7209 | |||
7210 | return -EAGAIN; | ||
7211 | } | ||
7212 | |||
7213 | void gk20a_suspend_all_sms(struct gk20a *g) | ||
7214 | { | ||
7215 | struct gr_gk20a *gr = &g->gr; | ||
7216 | u32 gpc, tpc; | ||
7217 | int err; | ||
7218 | u32 dbgr_control0; | ||
7219 | |||
7220 | /* if an SM debugger isn't attached, skip suspend */ | ||
7221 | if (!gk20a_gr_sm_debugger_attached(g)) { | ||
7222 | gk20a_err(dev_from_gk20a(g), "SM debugger not attached, " | ||
7223 | "skipping suspend!\n"); | ||
7224 | return; | ||
7225 | } | ||
7226 | |||
7227 | /* assert stop trigger. uniformity assumption: all SMs will have | ||
7228 | * the same state in dbg_control0. */ | ||
7229 | dbgr_control0 = | ||
7230 | gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r()); | ||
7231 | dbgr_control0 |= gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_enable_f(); | ||
7232 | |||
7233 | /* broadcast write */ | ||
7234 | gk20a_writel(g, | ||
7235 | gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0); | ||
7236 | |||
7237 | for (gpc = 0; gpc < gr->gpc_count; gpc++) { | ||
7238 | for (tpc = 0; tpc < gr->tpc_count; tpc++) { | ||
7239 | err = | ||
7240 | gk20a_gr_wait_for_sm_lock_down(g, gpc, tpc, 0, false); | ||
7241 | if (err) { | ||
7242 | gk20a_err(dev_from_gk20a(g), | ||
7243 | "SuspendAllSms failed\n"); | ||
7244 | return; | ||
7245 | } | ||
7246 | } | ||
7247 | } | ||
7248 | } | ||
7249 | |||
7250 | void gk20a_resume_all_sms(struct gk20a *g) | ||
7251 | { | ||
7252 | u32 dbgr_control0; | ||
7253 | /* | ||
7254 | * The following requires some clarification. Despite the fact that both | ||
7255 | * RUN_TRIGGER and STOP_TRIGGER have the word "TRIGGER" in their | ||
7256 | * names, only one is actually a trigger, and that is the STOP_TRIGGER. | ||
7257 | * Merely writing a 1(_TASK) to the RUN_TRIGGER is not sufficient to | ||
7258 | * resume the gpu - the _STOP_TRIGGER must explicitly be set to 0 | ||
7259 | * (_DISABLE) as well. | ||
7260 | |||
7261 | * Advice from the arch group: Disable the stop trigger first, as a | ||
7262 | * separate operation, in order to ensure that the trigger has taken | ||
7263 | * effect, before enabling the run trigger. | ||
7264 | */ | ||
7265 | |||
7266 | /*De-assert stop trigger */ | ||
7267 | dbgr_control0 = | ||
7268 | gk20a_readl(g, gr_gpcs_tpcs_sm_dbgr_control0_r()); | ||
7269 | dbgr_control0 &= ~gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_enable_f(); | ||
7270 | gk20a_writel(g, | ||
7271 | gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0); | ||
7272 | |||
7273 | /* Run trigger */ | ||
7274 | dbgr_control0 |= gr_gpcs_tpcs_sm_dbgr_control0_run_trigger_enable_f(); | ||
7275 | gk20a_writel(g, | ||
7276 | gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0); | ||
7277 | } | ||
7278 | |||
7201 | void gk20a_init_gr_ops(struct gpu_ops *gops) | 7279 | void gk20a_init_gr_ops(struct gpu_ops *gops) |
7202 | { | 7280 | { |
7203 | gops->gr.access_smpc_reg = gr_gk20a_access_smpc_reg; | 7281 | gops->gr.access_smpc_reg = gr_gk20a_access_smpc_reg; |
@@ -7232,3 +7310,4 @@ void gk20a_init_gr_ops(struct gpu_ops *gops) | |||
7232 | gops->gr.is_tpc_addr = gr_gk20a_is_tpc_addr; | 7310 | gops->gr.is_tpc_addr = gr_gk20a_is_tpc_addr; |
7233 | gops->gr.get_tpc_num = gr_gk20a_get_tpc_num; | 7311 | gops->gr.get_tpc_num = gr_gk20a_get_tpc_num; |
7234 | } | 7312 | } |
7313 | |||