summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a
diff options
context:
space:
mode:
authorsujeet baranwal <sbaranwal@nvidia.com>2015-08-20 20:04:44 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2015-12-04 16:03:11 -0500
commit397c6d44ed3ee6cc0c24fce7711bda4f0d6cd9bf (patch)
tree4cf5477fd29605022291239e4f060a8030f793d0 /drivers/gpu/nvgpu/gk20a
parent71c8d62657db7ef40a30b7504632d668f4e64bc6 (diff)
gpu: nvgpu: Wait for pause for SMs
SM locking & register reads Order has been changed. Also, functions have been implemented based on gk20a and gm20b. Change-Id: Iaf720d088130f84c4b2ca318d9860194c07966e1 Signed-off-by: sujeet baranwal <sbaranwal@nvidia.com> Signed-off-by: ashutosh jain <ashutoshj@nvidia.com> Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/837236
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r--drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c47
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h2
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c59
3 files changed, 69 insertions, 39 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
index 4f33c78f..e17e239b 100644
--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -30,7 +30,6 @@
30#include "hw_fb_gk20a.h" 30#include "hw_fb_gk20a.h"
31#include "hw_proj_gk20a.h" 31#include "hw_proj_gk20a.h"
32 32
33
34int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp) 33int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp)
35{ 34{
36 struct gk20a *g; 35 struct gk20a *g;
@@ -389,64 +388,48 @@ static int nvgpu_gpu_ioctl_set_debug_mode(
389 return err; 388 return err;
390} 389}
391 390
392static int nvgpu_gpu_ioctl_wait_for_pause( 391static int nvgpu_gpu_ioctl_wait_for_pause(struct gk20a *g,
393 struct gk20a *g,
394 struct nvgpu_gpu_wait_pause_args *args) 392 struct nvgpu_gpu_wait_pause_args *args)
395{ 393{
396 int err = 0, gpc, tpc; 394 int err = 0;
397 u32 sm_count, sm_id, size;
398 struct warpstate *w_state; 395 struct warpstate *w_state;
399 struct gr_gk20a *gr = &g->gr; 396 struct gr_gk20a *gr = &g->gr;
400 u32 tpc_offset, gpc_offset, reg_offset, global_mask; 397 u32 gpc, tpc, sm_count, sm_id, size;
401 u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0; 398 u32 global_mask;
402 399
403 sm_count = g->gr.gpc_count * g->gr.tpc_count; 400 sm_count = g->gr.gpc_count * g->gr.tpc_count;
404 size = sm_count * sizeof(struct warpstate); 401 size = sm_count * sizeof(struct warpstate);
405 w_state = kzalloc(size, GFP_KERNEL); 402 w_state = kzalloc(size, GFP_KERNEL);
406 403
404 /* Wait for the SMs to reach full stop. This condition is:
405 * 1) All SMs with valid warps must be in the trap handler (SM_IN_TRAP_MODE)
406 * 2) All SMs in the trap handler must have equivalent VALID and PAUSED warp
407 * masks.
408 */
407 global_mask = gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f() | 409 global_mask = gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f() |
408 gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f() | 410 gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f() |
409 gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f(); 411 gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f();
410 412
411 mutex_lock(&g->dbg_sessions_lock); 413 mutex_lock(&g->dbg_sessions_lock);
412 414
415 /* Lock down all SMs */
413 for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { 416 for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
414 417
415 gpc = g->gr.sm_to_cluster[sm_id].gpc_index; 418 gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
416 tpc = g->gr.sm_to_cluster[sm_id].tpc_index; 419 tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
417 420
418 tpc_offset = proj_tpc_in_gpc_stride_v() * tpc;
419 gpc_offset = proj_gpc_stride_v() * gpc;
420 reg_offset = tpc_offset + gpc_offset;
421
422 /* Wait until all valid warps on the sm are paused. The valid warp mask
423 * must be re-read with the paused mask because new warps may become
424 * valid as the sm is pausing.
425 */
426
427 err = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask); 421 err = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask);
422
428 if (err) { 423 if (err) {
429 gk20a_err(dev_from_gk20a(g), "sm did not lock down!\n"); 424 gk20a_err(dev_from_gk20a(g), "sm did not lock down!\n");
430 goto end; 425 goto end;
431 } 426 }
432
433 /* 64 bit read */
434 warps_valid = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset + 4) << 32;
435 warps_valid |= gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset);
436
437 /* 64 bit read */
438 warps_paused = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset + 4) << 32;
439 warps_paused |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset);
440
441 /* 64 bit read */
442 warps_trapped = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset + 4) << 32;
443 warps_trapped |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset);
444
445 w_state[sm_id].valid_warps = warps_valid;
446 w_state[sm_id].trapped_warps = warps_trapped;
447 w_state[sm_id].paused_warps = warps_paused;
448 } 427 }
449 428
429 /* Read the warp status */
430 g->ops.gr.bpt_reg_info(g, w_state);
431
432 /* Copy to user space - pointed by "args->pwarpstate" */
450 if (copy_to_user((void __user *)(uintptr_t)args->pwarpstate, w_state, size)) { 433 if (copy_to_user((void __user *)(uintptr_t)args->pwarpstate, w_state, size)) {
451 gk20a_dbg_fn("copy_to_user failed!"); 434 gk20a_dbg_fn("copy_to_user failed!");
452 err = -EFAULT; 435 err = -EFAULT;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 51955a3a..47256e24 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -185,6 +185,8 @@ struct gpu_ops {
185 u32 expect_delay); 185 u32 expect_delay);
186 void (*init_cyclestats)(struct gk20a *g); 186 void (*init_cyclestats)(struct gk20a *g);
187 void (*enable_cde_in_fecs)(void *ctx_ptr); 187 void (*enable_cde_in_fecs)(void *ctx_ptr);
188 void (*bpt_reg_info)(struct gk20a *g,
189 struct warpstate *w_state);
188 } gr; 190 } gr;
189 const char *name; 191 const char *name;
190 struct { 192 struct {
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 090f95a5..ef24e078 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -6973,8 +6973,8 @@ static u32 gr_gk20a_get_tpc_num(u32 addr)
6973static int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, 6973static int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc,
6974 u32 global_esr_mask, bool check_errors) 6974 u32 global_esr_mask, bool check_errors)
6975{ 6975{
6976 unsigned long end_jiffies = jiffies + 6976 bool locked_down;
6977 msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); 6977 bool no_error_pending;
6978 u32 delay = GR_IDLE_CHECK_DEFAULT; 6978 u32 delay = GR_IDLE_CHECK_DEFAULT;
6979 bool mmu_debug_mode_enabled = g->ops.mm.is_debug_mode_enabled(g); 6979 bool mmu_debug_mode_enabled = g->ops.mm.is_debug_mode_enabled(g);
6980 u32 offset = 6980 u32 offset =
@@ -6991,10 +6991,10 @@ static int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc,
6991 gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset); 6991 gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset);
6992 u32 dbgr_status0 = gk20a_readl(g, 6992 u32 dbgr_status0 = gk20a_readl(g,
6993 gr_gpc0_tpc0_sm_dbgr_status0_r() + offset); 6993 gr_gpc0_tpc0_sm_dbgr_status0_r() + offset);
6994 bool locked_down = 6994 locked_down =
6995 (gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(dbgr_status0) == 6995 (gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(dbgr_status0) ==
6996 gr_gpc0_tpc0_sm_dbgr_status0_locked_down_true_v()); 6996 gr_gpc0_tpc0_sm_dbgr_status0_locked_down_true_v());
6997 bool no_error_pending = 6997 no_error_pending =
6998 check_errors && 6998 check_errors &&
6999 (gr_gpc0_tpc0_sm_hww_warp_esr_error_v(warp_esr) == 6999 (gr_gpc0_tpc0_sm_hww_warp_esr_error_v(warp_esr) ==
7000 gr_gpc0_tpc0_sm_hww_warp_esr_error_none_v()) && 7000 gr_gpc0_tpc0_sm_hww_warp_esr_error_none_v()) &&
@@ -7018,9 +7018,7 @@ static int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc,
7018 7018
7019 usleep_range(delay, delay * 2); 7019 usleep_range(delay, delay * 2);
7020 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); 7020 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
7021 7021 } while (!locked_down);
7022 } while (time_before(jiffies, end_jiffies)
7023 || !tegra_platform_is_silicon());
7024 7022
7025 gk20a_err(dev_from_gk20a(g), 7023 gk20a_err(dev_from_gk20a(g),
7026 "GPC%d TPC%d: timed out while trying to lock down SM", 7024 "GPC%d TPC%d: timed out while trying to lock down SM",
@@ -7273,6 +7271,52 @@ static void gr_gk20a_init_cyclestats(struct gk20a *g)
7273#endif 7271#endif
7274} 7272}
7275 7273
7274void gr_gk20a_bpt_reg_info(struct gk20a *g, struct warpstate *w_state)
7275{
7276 /* Check if we have at least one valid warp */
7277 struct gr_gk20a *gr = &g->gr;
7278 u32 gpc, tpc, sm_id;
7279 u32 tpc_offset, gpc_offset, reg_offset;
7280 u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0;
7281
7282 for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
7283 gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
7284 tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
7285
7286 tpc_offset = proj_tpc_in_gpc_stride_v() * tpc;
7287 gpc_offset = proj_gpc_stride_v() * gpc;
7288 reg_offset = tpc_offset + gpc_offset;
7289
7290 /* 64 bit read */
7291 warps_valid = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset + 4) << 32;
7292 warps_valid |= gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset);
7293
7294
7295 /* 64 bit read */
7296 warps_paused = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset + 4) << 32;
7297 warps_paused |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset);
7298
7299 /* 64 bit read */
7300 warps_trapped = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset + 4) << 32;
7301 warps_trapped |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset);
7302
7303 w_state[sm_id].valid_warps[0] = warps_valid;
7304 w_state[sm_id].trapped_warps[0] = warps_trapped;
7305 w_state[sm_id].paused_warps[0] = warps_paused;
7306 }
7307
7308 /* Only for debug purpose */
7309 for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
7310 gk20a_dbg_fn("w_state[%d].valid_warps[0]: %llx\n",
7311 sm_id, w_state[sm_id].valid_warps[0]);
7312 gk20a_dbg_fn("w_state[%d].trapped_warps[0]: %llx\n",
7313 sm_id, w_state[sm_id].trapped_warps[0]);
7314 gk20a_dbg_fn("w_state[%d].paused_warps[0]: %llx\n",
7315 sm_id, w_state[sm_id].paused_warps[0]);
7316 }
7317}
7318
7319
7276void gk20a_init_gr_ops(struct gpu_ops *gops) 7320void gk20a_init_gr_ops(struct gpu_ops *gops)
7277{ 7321{
7278 gops->gr.access_smpc_reg = gr_gk20a_access_smpc_reg; 7322 gops->gr.access_smpc_reg = gr_gk20a_access_smpc_reg;
@@ -7324,4 +7368,5 @@ void gk20a_init_gr_ops(struct gpu_ops *gops)
7324 gops->gr.init_sm_dsm_reg_info = gr_gk20a_init_sm_dsm_reg_info; 7368 gops->gr.init_sm_dsm_reg_info = gr_gk20a_init_sm_dsm_reg_info;
7325 gops->gr.wait_empty = gr_gk20a_wait_idle; 7369 gops->gr.wait_empty = gr_gk20a_wait_idle;
7326 gops->gr.init_cyclestats = gr_gk20a_init_cyclestats; 7370 gops->gr.init_cyclestats = gr_gk20a_init_cyclestats;
7371 gops->gr.bpt_reg_info = gr_gk20a_bpt_reg_info;
7327} 7372}