summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu
diff options
context:
space:
mode:
authorSeema Khowala <seemaj@nvidia.com>2017-06-22 14:53:04 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-07-06 15:04:43 -0400
commit0852c9f1aba1654e380ccdd13cd0540fbb5a8ab0 (patch)
tree7f5a797567b0c76f89b8e8241f794b432129d8bc /drivers/gpu/nvgpu
parent4728761b6cd002404b9ccd8b318cc9f772962efb (diff)
gpu: nvgpu: add sm lock_down gr ops
Add lock_down_sm and wait_for_sm_lock_down gr ops Required to support multiple SM and t19x SM register address changes JIRA GPUT19X-75 Change-Id: I529babde51d9b2143fe3740a4f67c582b7eb404b Signed-off-by: Seema Khowala <seemaj@nvidia.com> Reviewed-on: https://git-master/r/1514042 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h4
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c55
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.h8
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_gm20b.c2
4 files changed, 37 insertions, 32 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index a0a67332..483cdd6e 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -296,6 +296,10 @@ struct gpu_ops {
296 u32 (*get_sm_hww_global_esr)(struct gk20a *g, 296 u32 (*get_sm_hww_global_esr)(struct gk20a *g,
297 u32 gpc, u32 tpc, u32 sm); 297 u32 gpc, u32 tpc, u32 sm);
298 u32 (*get_sm_no_lock_down_hww_global_esr_mask)(struct gk20a *g); 298 u32 (*get_sm_no_lock_down_hww_global_esr_mask)(struct gk20a *g);
299 int (*lock_down_sm)(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
300 u32 global_esr_mask, bool check_errors);
301 int (*wait_for_sm_lock_down)(struct gk20a *g, u32 gpc, u32 tpc,
302 u32 sm, u32 global_esr_mask, bool check_errors);
299 void (*get_esr_sm_sel)(struct gk20a *g, u32 gpc, u32 tpc, 303 void (*get_esr_sm_sel)(struct gk20a *g, u32 gpc, u32 tpc,
300 u32 *esr_sm_sel); 304 u32 *esr_sm_sel);
301 int (*handle_sm_exception)(struct gk20a *g, 305 int (*handle_sm_exception)(struct gk20a *g,
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index d325f794..d2c77c93 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -5467,16 +5467,14 @@ unlock:
5467} 5467}
5468 5468
5469int gk20a_gr_lock_down_sm(struct gk20a *g, 5469int gk20a_gr_lock_down_sm(struct gk20a *g,
5470 u32 gpc, u32 tpc, u32 global_esr_mask, 5470 u32 gpc, u32 tpc, u32 sm, u32 global_esr_mask,
5471 bool check_errors) 5471 bool check_errors)
5472{ 5472{
5473 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); 5473 u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
5474 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
5475 u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
5476 u32 dbgr_control0; 5474 u32 dbgr_control0;
5477 5475
5478 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, 5476 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
5479 "GPC%d TPC%d: locking down SM", gpc, tpc); 5477 "GPC%d TPC%d SM%d: assert stop trigger", gpc, tpc, sm);
5480 5478
5481 /* assert stop trigger */ 5479 /* assert stop trigger */
5482 dbgr_control0 = 5480 dbgr_control0 =
@@ -5485,7 +5483,7 @@ int gk20a_gr_lock_down_sm(struct gk20a *g,
5485 gk20a_writel(g, 5483 gk20a_writel(g,
5486 gr_gpc0_tpc0_sm_dbgr_control0_r() + offset, dbgr_control0); 5484 gr_gpc0_tpc0_sm_dbgr_control0_r() + offset, dbgr_control0);
5487 5485
5488 return gk20a_gr_wait_for_sm_lock_down(g, gpc, tpc, global_esr_mask, 5486 return g->ops.gr.wait_for_sm_lock_down(g, gpc, tpc, sm, global_esr_mask,
5489 check_errors); 5487 check_errors);
5490} 5488}
5491 5489
@@ -5598,7 +5596,8 @@ int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
5598 } 5596 }
5599 5597
5600 if (do_warp_sync) { 5598 if (do_warp_sync) {
5601 ret = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask, true); 5599 ret = g->ops.gr.lock_down_sm(g, gpc, tpc, sm,
5600 global_mask, true);
5602 if (ret) { 5601 if (ret) {
5603 nvgpu_err(g, "sm did not lock down!"); 5602 nvgpu_err(g, "sm did not lock down!");
5604 return ret; 5603 return ret;
@@ -7906,25 +7905,21 @@ void gk20a_init_gr(struct gk20a *g)
7906 nvgpu_cond_init(&g->gr.init_wq); 7905 nvgpu_cond_init(&g->gr.init_wq);
7907} 7906}
7908 7907
7909int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, 7908int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
7910 u32 global_esr_mask, bool check_errors) 7909 u32 global_esr_mask, bool check_errors)
7911{ 7910{
7912 bool locked_down; 7911 bool locked_down;
7913 bool no_error_pending; 7912 bool no_error_pending;
7914 u32 delay = GR_IDLE_CHECK_DEFAULT; 7913 u32 delay = GR_IDLE_CHECK_DEFAULT;
7915 bool mmu_debug_mode_enabled = g->ops.fb.is_debug_mode_enabled(g); 7914 bool mmu_debug_mode_enabled = g->ops.fb.is_debug_mode_enabled(g);
7916 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); 7915 u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc);
7917 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
7918 u32 offset =
7919 gpc_stride * gpc + tpc_in_gpc_stride * tpc;
7920 u32 dbgr_status0 = 0, dbgr_control0 = 0; 7916 u32 dbgr_status0 = 0, dbgr_control0 = 0;
7921 u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0; 7917 u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0;
7922 struct nvgpu_timeout timeout; 7918 struct nvgpu_timeout timeout;
7923 u32 warp_esr; 7919 u32 warp_esr;
7924 u32 sm = 0;
7925 7920
7926 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, 7921 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
7927 "GPC%d TPC%d: locking down SM", gpc, tpc); 7922 "GPC%d TPC%d SM%d: locking down SM", gpc, tpc, sm);
7928 7923
7929 nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g), 7924 nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),
7930 NVGPU_TIMER_CPU_TIMER); 7925 NVGPU_TIMER_CPU_TIMER);
@@ -7949,7 +7944,8 @@ int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc,
7949 7944
7950 if (locked_down || no_error_pending) { 7945 if (locked_down || no_error_pending) {
7951 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, 7946 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
7952 "GPC%d TPC%d: locked down SM", gpc, tpc); 7947 "GPC%d TPC%d SM%d: locked down SM",
7948 gpc, tpc, sm);
7953 return 0; 7949 return 0;
7954 } 7950 }
7955 7951
@@ -7959,7 +7955,7 @@ int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc,
7959 (g->ops.mm.mmu_fault_pending(g))) { 7955 (g->ops.mm.mmu_fault_pending(g))) {
7960 nvgpu_err(g, 7956 nvgpu_err(g,
7961 "GPC%d TPC%d: mmu fault pending," 7957 "GPC%d TPC%d: mmu fault pending,"
7962 " sm will never lock down!", gpc, tpc); 7958 " SM%d will never lock down!", gpc, tpc, sm);
7963 return -EFAULT; 7959 return -EFAULT;
7964 } 7960 }
7965 7961
@@ -8017,7 +8013,7 @@ void gk20a_gr_suspend_single_sm(struct gk20a *g,
8017 gk20a_writel(g, gr_gpc0_tpc0_sm_dbgr_control0_r() + offset, 8013 gk20a_writel(g, gr_gpc0_tpc0_sm_dbgr_control0_r() + offset,
8018 dbgr_control0); 8014 dbgr_control0);
8019 8015
8020 err = gk20a_gr_wait_for_sm_lock_down(g, gpc, tpc, 8016 err = g->ops.gr.wait_for_sm_lock_down(g, gpc, tpc, sm,
8021 global_esr_mask, check_errors); 8017 global_esr_mask, check_errors);
8022 if (err) { 8018 if (err) {
8023 nvgpu_err(g, 8019 nvgpu_err(g,
@@ -8030,9 +8026,10 @@ void gk20a_gr_suspend_all_sms(struct gk20a *g,
8030 u32 global_esr_mask, bool check_errors) 8026 u32 global_esr_mask, bool check_errors)
8031{ 8027{
8032 struct gr_gk20a *gr = &g->gr; 8028 struct gr_gk20a *gr = &g->gr;
8033 u32 gpc, tpc; 8029 u32 gpc, tpc, sm;
8034 int err; 8030 int err;
8035 u32 dbgr_control0; 8031 u32 dbgr_control0;
8032 u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
8036 8033
8037 /* if an SM debugger isn't attached, skip suspend */ 8034 /* if an SM debugger isn't attached, skip suspend */
8038 if (!g->ops.gr.sm_debugger_attached(g)) { 8035 if (!g->ops.gr.sm_debugger_attached(g)) {
@@ -8055,13 +8052,14 @@ void gk20a_gr_suspend_all_sms(struct gk20a *g,
8055 8052
8056 for (gpc = 0; gpc < gr->gpc_count; gpc++) { 8053 for (gpc = 0; gpc < gr->gpc_count; gpc++) {
8057 for (tpc = 0; tpc < gr_gk20a_get_tpc_count(gr, gpc); tpc++) { 8054 for (tpc = 0; tpc < gr_gk20a_get_tpc_count(gr, gpc); tpc++) {
8058 err = 8055 for (sm = 0; sm < sm_per_tpc; sm++) {
8059 gk20a_gr_wait_for_sm_lock_down(g, gpc, tpc, 8056 err = g->ops.gr.wait_for_sm_lock_down(g,
8057 gpc, tpc, sm,
8060 global_esr_mask, check_errors); 8058 global_esr_mask, check_errors);
8061 if (err) { 8059 if (err) {
8062 nvgpu_err(g, 8060 nvgpu_err(g, "SuspendAllSms failed");
8063 "SuspendAllSms failed"); 8061 return;
8064 return; 8062 }
8065 } 8063 }
8066 } 8064 }
8067 } 8065 }
@@ -8371,7 +8369,7 @@ int gr_gk20a_wait_for_pause(struct gk20a *g, struct warpstate *w_state)
8371{ 8369{
8372 int err = 0; 8370 int err = 0;
8373 struct gr_gk20a *gr = &g->gr; 8371 struct gr_gk20a *gr = &g->gr;
8374 u32 gpc, tpc, sm_id; 8372 u32 gpc, tpc, sm, sm_id;
8375 u32 global_mask; 8373 u32 global_mask;
8376 8374
8377 /* Wait for the SMs to reach full stop. This condition is: 8375 /* Wait for the SMs to reach full stop. This condition is:
@@ -8386,9 +8384,10 @@ int gr_gk20a_wait_for_pause(struct gk20a *g, struct warpstate *w_state)
8386 8384
8387 gpc = g->gr.sm_to_cluster[sm_id].gpc_index; 8385 gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
8388 tpc = g->gr.sm_to_cluster[sm_id].tpc_index; 8386 tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
8387 sm = g->gr.sm_to_cluster[sm_id].sm_index;
8389 8388
8390 err = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask, false); 8389 err = g->ops.gr.lock_down_sm(g, gpc, tpc, sm,
8391 8390 global_mask, false);
8392 if (err) { 8391 if (err) {
8393 nvgpu_err(g, "sm did not lock down!"); 8392 nvgpu_err(g, "sm did not lock down!");
8394 return err; 8393 return err;
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 8230ba72..77db5cf6 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -622,9 +622,6 @@ void gk20a_gr_suspend_single_sm(struct gk20a *g,
622void gk20a_gr_suspend_all_sms(struct gk20a *g, 622void gk20a_gr_suspend_all_sms(struct gk20a *g,
623 u32 global_esr_mask, bool check_errors); 623 u32 global_esr_mask, bool check_errors);
624u32 gr_gk20a_get_tpc_count(struct gr_gk20a *gr, u32 gpc_index); 624u32 gr_gk20a_get_tpc_count(struct gr_gk20a *gr, u32 gpc_index);
625int gk20a_gr_lock_down_sm(struct gk20a *g,
626 u32 gpc, u32 tpc, u32 global_esr_mask,
627 bool check_errors);
628int gr_gk20a_set_sm_debug_mode(struct gk20a *g, 625int gr_gk20a_set_sm_debug_mode(struct gk20a *g,
629 struct channel_gk20a *ch, u64 sms, bool enable); 626 struct channel_gk20a *ch, u64 sms, bool enable);
630bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch); 627bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch);
@@ -681,7 +678,10 @@ static inline void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g)
681 678
682int gk20a_gr_handle_fecs_error(struct gk20a *g, struct channel_gk20a *ch, 679int gk20a_gr_handle_fecs_error(struct gk20a *g, struct channel_gk20a *ch,
683 struct gr_gk20a_isr_data *isr_data); 680 struct gr_gk20a_isr_data *isr_data);
684int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, 681int gk20a_gr_lock_down_sm(struct gk20a *g,
682 u32 gpc, u32 tpc, u32 sm, u32 global_esr_mask,
683 bool check_errors);
684int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
685 u32 global_esr_mask, bool check_errors); 685 u32 global_esr_mask, bool check_errors);
686void gk20a_gr_clear_sm_hww(struct gk20a *g, 686void gk20a_gr_clear_sm_hww(struct gk20a *g,
687 u32 gpc, u32 tpc, u32 global_esr); 687 u32 gpc, u32 tpc, u32 global_esr);
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index fecd487f..333f0340 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -1638,4 +1638,6 @@ void gm20b_init_gr(struct gpu_ops *gops)
1638 gops->gr.get_sm_hww_global_esr = gk20a_gr_get_sm_hww_global_esr; 1638 gops->gr.get_sm_hww_global_esr = gk20a_gr_get_sm_hww_global_esr;
1639 gops->gr.get_sm_no_lock_down_hww_global_esr_mask = 1639 gops->gr.get_sm_no_lock_down_hww_global_esr_mask =
1640 gk20a_gr_get_sm_no_lock_down_hww_global_esr_mask; 1640 gk20a_gr_get_sm_no_lock_down_hww_global_esr_mask;
1641 gops->gr.lock_down_sm = gk20a_gr_lock_down_sm;
1642 gops->gr.wait_for_sm_lock_down = gk20a_gr_wait_for_sm_lock_down;
1641} 1643}