diff options
author | sujeet baranwal <sbaranwal@nvidia.com> | 2015-08-20 20:04:44 -0400 |
---|---|---|
committer | Terje Bergstrom <tbergstrom@nvidia.com> | 2015-12-04 16:03:11 -0500 |
commit | 397c6d44ed3ee6cc0c24fce7711bda4f0d6cd9bf (patch) | |
tree | 4cf5477fd29605022291239e4f060a8030f793d0 /drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c | |
parent | 71c8d62657db7ef40a30b7504632d668f4e64bc6 (diff) |
gpu: nvgpu: Wait for pause for SMs
SM locking & register reads Order has been changed.
Also, functions have been implemented based on gk20a
and gm20b.
Change-Id: Iaf720d088130f84c4b2ca318d9860194c07966e1
Signed-off-by: sujeet baranwal <sbaranwal@nvidia.com>
Signed-off-by: ashutosh jain <ashutoshj@nvidia.com>
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/837236
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c | 47 |
1 files changed, 15 insertions, 32 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c index 4f33c78f..e17e239b 100644 --- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c | |||
@@ -30,7 +30,6 @@ | |||
30 | #include "hw_fb_gk20a.h" | 30 | #include "hw_fb_gk20a.h" |
31 | #include "hw_proj_gk20a.h" | 31 | #include "hw_proj_gk20a.h" |
32 | 32 | ||
33 | |||
34 | int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp) | 33 | int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp) |
35 | { | 34 | { |
36 | struct gk20a *g; | 35 | struct gk20a *g; |
@@ -389,64 +388,48 @@ static int nvgpu_gpu_ioctl_set_debug_mode( | |||
389 | return err; | 388 | return err; |
390 | } | 389 | } |
391 | 390 | ||
392 | static int nvgpu_gpu_ioctl_wait_for_pause( | 391 | static int nvgpu_gpu_ioctl_wait_for_pause(struct gk20a *g, |
393 | struct gk20a *g, | ||
394 | struct nvgpu_gpu_wait_pause_args *args) | 392 | struct nvgpu_gpu_wait_pause_args *args) |
395 | { | 393 | { |
396 | int err = 0, gpc, tpc; | 394 | int err = 0; |
397 | u32 sm_count, sm_id, size; | ||
398 | struct warpstate *w_state; | 395 | struct warpstate *w_state; |
399 | struct gr_gk20a *gr = &g->gr; | 396 | struct gr_gk20a *gr = &g->gr; |
400 | u32 tpc_offset, gpc_offset, reg_offset, global_mask; | 397 | u32 gpc, tpc, sm_count, sm_id, size; |
401 | u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0; | 398 | u32 global_mask; |
402 | 399 | ||
403 | sm_count = g->gr.gpc_count * g->gr.tpc_count; | 400 | sm_count = g->gr.gpc_count * g->gr.tpc_count; |
404 | size = sm_count * sizeof(struct warpstate); | 401 | size = sm_count * sizeof(struct warpstate); |
405 | w_state = kzalloc(size, GFP_KERNEL); | 402 | w_state = kzalloc(size, GFP_KERNEL); |
406 | 403 | ||
404 | /* Wait for the SMs to reach full stop. This condition is: | ||
405 | * 1) All SMs with valid warps must be in the trap handler (SM_IN_TRAP_MODE) | ||
406 | * 2) All SMs in the trap handler must have equivalent VALID and PAUSED warp | ||
407 | * masks. | ||
408 | */ | ||
407 | global_mask = gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f() | | 409 | global_mask = gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f() | |
408 | gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f() | | 410 | gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f() | |
409 | gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f(); | 411 | gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f(); |
410 | 412 | ||
411 | mutex_lock(&g->dbg_sessions_lock); | 413 | mutex_lock(&g->dbg_sessions_lock); |
412 | 414 | ||
415 | /* Lock down all SMs */ | ||
413 | for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { | 416 | for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { |
414 | 417 | ||
415 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; | 418 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; |
416 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; | 419 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; |
417 | 420 | ||
418 | tpc_offset = proj_tpc_in_gpc_stride_v() * tpc; | ||
419 | gpc_offset = proj_gpc_stride_v() * gpc; | ||
420 | reg_offset = tpc_offset + gpc_offset; | ||
421 | |||
422 | /* Wait until all valid warps on the sm are paused. The valid warp mask | ||
423 | * must be re-read with the paused mask because new warps may become | ||
424 | * valid as the sm is pausing. | ||
425 | */ | ||
426 | |||
427 | err = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask); | 421 | err = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask); |
422 | |||
428 | if (err) { | 423 | if (err) { |
429 | gk20a_err(dev_from_gk20a(g), "sm did not lock down!\n"); | 424 | gk20a_err(dev_from_gk20a(g), "sm did not lock down!\n"); |
430 | goto end; | 425 | goto end; |
431 | } | 426 | } |
432 | |||
433 | /* 64 bit read */ | ||
434 | warps_valid = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset + 4) << 32; | ||
435 | warps_valid |= gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset); | ||
436 | |||
437 | /* 64 bit read */ | ||
438 | warps_paused = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset + 4) << 32; | ||
439 | warps_paused |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset); | ||
440 | |||
441 | /* 64 bit read */ | ||
442 | warps_trapped = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset + 4) << 32; | ||
443 | warps_trapped |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset); | ||
444 | |||
445 | w_state[sm_id].valid_warps = warps_valid; | ||
446 | w_state[sm_id].trapped_warps = warps_trapped; | ||
447 | w_state[sm_id].paused_warps = warps_paused; | ||
448 | } | 427 | } |
449 | 428 | ||
429 | /* Read the warp status */ | ||
430 | g->ops.gr.bpt_reg_info(g, w_state); | ||
431 | |||
432 | /* Copy to user space - pointed by "args->pwarpstate" */ | ||
450 | if (copy_to_user((void __user *)(uintptr_t)args->pwarpstate, w_state, size)) { | 433 | if (copy_to_user((void __user *)(uintptr_t)args->pwarpstate, w_state, size)) { |
451 | gk20a_dbg_fn("copy_to_user failed!"); | 434 | gk20a_dbg_fn("copy_to_user failed!"); |
452 | err = -EFAULT; | 435 | err = -EFAULT; |