diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c | 47 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 59 |
3 files changed, 69 insertions, 39 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c index 4f33c78f..e17e239b 100644 --- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c | |||
@@ -30,7 +30,6 @@ | |||
30 | #include "hw_fb_gk20a.h" | 30 | #include "hw_fb_gk20a.h" |
31 | #include "hw_proj_gk20a.h" | 31 | #include "hw_proj_gk20a.h" |
32 | 32 | ||
33 | |||
34 | int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp) | 33 | int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp) |
35 | { | 34 | { |
36 | struct gk20a *g; | 35 | struct gk20a *g; |
@@ -389,64 +388,48 @@ static int nvgpu_gpu_ioctl_set_debug_mode( | |||
389 | return err; | 388 | return err; |
390 | } | 389 | } |
391 | 390 | ||
392 | static int nvgpu_gpu_ioctl_wait_for_pause( | 391 | static int nvgpu_gpu_ioctl_wait_for_pause(struct gk20a *g, |
393 | struct gk20a *g, | ||
394 | struct nvgpu_gpu_wait_pause_args *args) | 392 | struct nvgpu_gpu_wait_pause_args *args) |
395 | { | 393 | { |
396 | int err = 0, gpc, tpc; | 394 | int err = 0; |
397 | u32 sm_count, sm_id, size; | ||
398 | struct warpstate *w_state; | 395 | struct warpstate *w_state; |
399 | struct gr_gk20a *gr = &g->gr; | 396 | struct gr_gk20a *gr = &g->gr; |
400 | u32 tpc_offset, gpc_offset, reg_offset, global_mask; | 397 | u32 gpc, tpc, sm_count, sm_id, size; |
401 | u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0; | 398 | u32 global_mask; |
402 | 399 | ||
403 | sm_count = g->gr.gpc_count * g->gr.tpc_count; | 400 | sm_count = g->gr.gpc_count * g->gr.tpc_count; |
404 | size = sm_count * sizeof(struct warpstate); | 401 | size = sm_count * sizeof(struct warpstate); |
405 | w_state = kzalloc(size, GFP_KERNEL); | 402 | w_state = kzalloc(size, GFP_KERNEL); |
406 | 403 | ||
404 | /* Wait for the SMs to reach full stop. This condition is: | ||
405 | * 1) All SMs with valid warps must be in the trap handler (SM_IN_TRAP_MODE) | ||
406 | * 2) All SMs in the trap handler must have equivalent VALID and PAUSED warp | ||
407 | * masks. | ||
408 | */ | ||
407 | global_mask = gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f() | | 409 | global_mask = gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f() | |
408 | gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f() | | 410 | gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f() | |
409 | gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f(); | 411 | gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f(); |
410 | 412 | ||
411 | mutex_lock(&g->dbg_sessions_lock); | 413 | mutex_lock(&g->dbg_sessions_lock); |
412 | 414 | ||
415 | /* Lock down all SMs */ | ||
413 | for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { | 416 | for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { |
414 | 417 | ||
415 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; | 418 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; |
416 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; | 419 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; |
417 | 420 | ||
418 | tpc_offset = proj_tpc_in_gpc_stride_v() * tpc; | ||
419 | gpc_offset = proj_gpc_stride_v() * gpc; | ||
420 | reg_offset = tpc_offset + gpc_offset; | ||
421 | |||
422 | /* Wait until all valid warps on the sm are paused. The valid warp mask | ||
423 | * must be re-read with the paused mask because new warps may become | ||
424 | * valid as the sm is pausing. | ||
425 | */ | ||
426 | |||
427 | err = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask); | 421 | err = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask); |
422 | |||
428 | if (err) { | 423 | if (err) { |
429 | gk20a_err(dev_from_gk20a(g), "sm did not lock down!\n"); | 424 | gk20a_err(dev_from_gk20a(g), "sm did not lock down!\n"); |
430 | goto end; | 425 | goto end; |
431 | } | 426 | } |
432 | |||
433 | /* 64 bit read */ | ||
434 | warps_valid = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset + 4) << 32; | ||
435 | warps_valid |= gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset); | ||
436 | |||
437 | /* 64 bit read */ | ||
438 | warps_paused = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset + 4) << 32; | ||
439 | warps_paused |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset); | ||
440 | |||
441 | /* 64 bit read */ | ||
442 | warps_trapped = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset + 4) << 32; | ||
443 | warps_trapped |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset); | ||
444 | |||
445 | w_state[sm_id].valid_warps = warps_valid; | ||
446 | w_state[sm_id].trapped_warps = warps_trapped; | ||
447 | w_state[sm_id].paused_warps = warps_paused; | ||
448 | } | 427 | } |
449 | 428 | ||
429 | /* Read the warp status */ | ||
430 | g->ops.gr.bpt_reg_info(g, w_state); | ||
431 | |||
432 | /* Copy to user space - pointed by "args->pwarpstate" */ | ||
450 | if (copy_to_user((void __user *)(uintptr_t)args->pwarpstate, w_state, size)) { | 433 | if (copy_to_user((void __user *)(uintptr_t)args->pwarpstate, w_state, size)) { |
451 | gk20a_dbg_fn("copy_to_user failed!"); | 434 | gk20a_dbg_fn("copy_to_user failed!"); |
452 | err = -EFAULT; | 435 | err = -EFAULT; |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 51955a3a..47256e24 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -185,6 +185,8 @@ struct gpu_ops { | |||
185 | u32 expect_delay); | 185 | u32 expect_delay); |
186 | void (*init_cyclestats)(struct gk20a *g); | 186 | void (*init_cyclestats)(struct gk20a *g); |
187 | void (*enable_cde_in_fecs)(void *ctx_ptr); | 187 | void (*enable_cde_in_fecs)(void *ctx_ptr); |
188 | void (*bpt_reg_info)(struct gk20a *g, | ||
189 | struct warpstate *w_state); | ||
188 | } gr; | 190 | } gr; |
189 | const char *name; | 191 | const char *name; |
190 | struct { | 192 | struct { |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 090f95a5..ef24e078 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -6973,8 +6973,8 @@ static u32 gr_gk20a_get_tpc_num(u32 addr) | |||
6973 | static int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, | 6973 | static int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, |
6974 | u32 global_esr_mask, bool check_errors) | 6974 | u32 global_esr_mask, bool check_errors) |
6975 | { | 6975 | { |
6976 | unsigned long end_jiffies = jiffies + | 6976 | bool locked_down; |
6977 | msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); | 6977 | bool no_error_pending; |
6978 | u32 delay = GR_IDLE_CHECK_DEFAULT; | 6978 | u32 delay = GR_IDLE_CHECK_DEFAULT; |
6979 | bool mmu_debug_mode_enabled = g->ops.mm.is_debug_mode_enabled(g); | 6979 | bool mmu_debug_mode_enabled = g->ops.mm.is_debug_mode_enabled(g); |
6980 | u32 offset = | 6980 | u32 offset = |
@@ -6991,10 +6991,10 @@ static int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, | |||
6991 | gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset); | 6991 | gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset); |
6992 | u32 dbgr_status0 = gk20a_readl(g, | 6992 | u32 dbgr_status0 = gk20a_readl(g, |
6993 | gr_gpc0_tpc0_sm_dbgr_status0_r() + offset); | 6993 | gr_gpc0_tpc0_sm_dbgr_status0_r() + offset); |
6994 | bool locked_down = | 6994 | locked_down = |
6995 | (gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(dbgr_status0) == | 6995 | (gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(dbgr_status0) == |
6996 | gr_gpc0_tpc0_sm_dbgr_status0_locked_down_true_v()); | 6996 | gr_gpc0_tpc0_sm_dbgr_status0_locked_down_true_v()); |
6997 | bool no_error_pending = | 6997 | no_error_pending = |
6998 | check_errors && | 6998 | check_errors && |
6999 | (gr_gpc0_tpc0_sm_hww_warp_esr_error_v(warp_esr) == | 6999 | (gr_gpc0_tpc0_sm_hww_warp_esr_error_v(warp_esr) == |
7000 | gr_gpc0_tpc0_sm_hww_warp_esr_error_none_v()) && | 7000 | gr_gpc0_tpc0_sm_hww_warp_esr_error_none_v()) && |
@@ -7018,9 +7018,7 @@ static int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, | |||
7018 | 7018 | ||
7019 | usleep_range(delay, delay * 2); | 7019 | usleep_range(delay, delay * 2); |
7020 | delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); | 7020 | delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); |
7021 | 7021 | } while (!locked_down); | |
7022 | } while (time_before(jiffies, end_jiffies) | ||
7023 | || !tegra_platform_is_silicon()); | ||
7024 | 7022 | ||
7025 | gk20a_err(dev_from_gk20a(g), | 7023 | gk20a_err(dev_from_gk20a(g), |
7026 | "GPC%d TPC%d: timed out while trying to lock down SM", | 7024 | "GPC%d TPC%d: timed out while trying to lock down SM", |
@@ -7273,6 +7271,52 @@ static void gr_gk20a_init_cyclestats(struct gk20a *g) | |||
7273 | #endif | 7271 | #endif |
7274 | } | 7272 | } |
7275 | 7273 | ||
7274 | void gr_gk20a_bpt_reg_info(struct gk20a *g, struct warpstate *w_state) | ||
7275 | { | ||
7276 | /* Check if we have at least one valid warp */ | ||
7277 | struct gr_gk20a *gr = &g->gr; | ||
7278 | u32 gpc, tpc, sm_id; | ||
7279 | u32 tpc_offset, gpc_offset, reg_offset; | ||
7280 | u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0; | ||
7281 | |||
7282 | for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { | ||
7283 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; | ||
7284 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; | ||
7285 | |||
7286 | tpc_offset = proj_tpc_in_gpc_stride_v() * tpc; | ||
7287 | gpc_offset = proj_gpc_stride_v() * gpc; | ||
7288 | reg_offset = tpc_offset + gpc_offset; | ||
7289 | |||
7290 | /* 64 bit read */ | ||
7291 | warps_valid = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset + 4) << 32; | ||
7292 | warps_valid |= gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset); | ||
7293 | |||
7294 | |||
7295 | /* 64 bit read */ | ||
7296 | warps_paused = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset + 4) << 32; | ||
7297 | warps_paused |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset); | ||
7298 | |||
7299 | /* 64 bit read */ | ||
7300 | warps_trapped = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset + 4) << 32; | ||
7301 | warps_trapped |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset); | ||
7302 | |||
7303 | w_state[sm_id].valid_warps[0] = warps_valid; | ||
7304 | w_state[sm_id].trapped_warps[0] = warps_trapped; | ||
7305 | w_state[sm_id].paused_warps[0] = warps_paused; | ||
7306 | } | ||
7307 | |||
7308 | /* Only for debug purpose */ | ||
7309 | for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { | ||
7310 | gk20a_dbg_fn("w_state[%d].valid_warps[0]: %llx\n", | ||
7311 | sm_id, w_state[sm_id].valid_warps[0]); | ||
7312 | gk20a_dbg_fn("w_state[%d].trapped_warps[0]: %llx\n", | ||
7313 | sm_id, w_state[sm_id].trapped_warps[0]); | ||
7314 | gk20a_dbg_fn("w_state[%d].paused_warps[0]: %llx\n", | ||
7315 | sm_id, w_state[sm_id].paused_warps[0]); | ||
7316 | } | ||
7317 | } | ||
7318 | |||
7319 | |||
7276 | void gk20a_init_gr_ops(struct gpu_ops *gops) | 7320 | void gk20a_init_gr_ops(struct gpu_ops *gops) |
7277 | { | 7321 | { |
7278 | gops->gr.access_smpc_reg = gr_gk20a_access_smpc_reg; | 7322 | gops->gr.access_smpc_reg = gr_gk20a_access_smpc_reg; |
@@ -7324,4 +7368,5 @@ void gk20a_init_gr_ops(struct gpu_ops *gops) | |||
7324 | gops->gr.init_sm_dsm_reg_info = gr_gk20a_init_sm_dsm_reg_info; | 7368 | gops->gr.init_sm_dsm_reg_info = gr_gk20a_init_sm_dsm_reg_info; |
7325 | gops->gr.wait_empty = gr_gk20a_wait_idle; | 7369 | gops->gr.wait_empty = gr_gk20a_wait_idle; |
7326 | gops->gr.init_cyclestats = gr_gk20a_init_cyclestats; | 7370 | gops->gr.init_cyclestats = gr_gk20a_init_cyclestats; |
7371 | gops->gr.bpt_reg_info = gr_gk20a_bpt_reg_info; | ||
7327 | } | 7372 | } |