diff options
author | Seema Khowala <seemaj@nvidia.com> | 2017-06-22 14:53:04 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-07-06 15:04:43 -0400 |
commit | 0852c9f1aba1654e380ccdd13cd0540fbb5a8ab0 (patch) | |
tree | 7f5a797567b0c76f89b8e8241f794b432129d8bc /drivers | |
parent | 4728761b6cd002404b9ccd8b318cc9f772962efb (diff) |
gpu: nvgpu: add sm lock_down gr ops
Add lock_down_sm and wait_for_sm_lock_down gr ops
Required to support multiple SM and t19x SM register
address changes
JIRA GPUT19X-75
Change-Id: I529babde51d9b2143fe3740a4f67c582b7eb404b
Signed-off-by: Seema Khowala <seemaj@nvidia.com>
Reviewed-on: https://git-master/r/1514042
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 55 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 8 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 2 |
4 files changed, 37 insertions, 32 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index a0a67332..483cdd6e 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -296,6 +296,10 @@ struct gpu_ops { | |||
296 | u32 (*get_sm_hww_global_esr)(struct gk20a *g, | 296 | u32 (*get_sm_hww_global_esr)(struct gk20a *g, |
297 | u32 gpc, u32 tpc, u32 sm); | 297 | u32 gpc, u32 tpc, u32 sm); |
298 | u32 (*get_sm_no_lock_down_hww_global_esr_mask)(struct gk20a *g); | 298 | u32 (*get_sm_no_lock_down_hww_global_esr_mask)(struct gk20a *g); |
299 | int (*lock_down_sm)(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, | ||
300 | u32 global_esr_mask, bool check_errors); | ||
301 | int (*wait_for_sm_lock_down)(struct gk20a *g, u32 gpc, u32 tpc, | ||
302 | u32 sm, u32 global_esr_mask, bool check_errors); | ||
299 | void (*get_esr_sm_sel)(struct gk20a *g, u32 gpc, u32 tpc, | 303 | void (*get_esr_sm_sel)(struct gk20a *g, u32 gpc, u32 tpc, |
300 | u32 *esr_sm_sel); | 304 | u32 *esr_sm_sel); |
301 | int (*handle_sm_exception)(struct gk20a *g, | 305 | int (*handle_sm_exception)(struct gk20a *g, |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index d325f794..d2c77c93 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -5467,16 +5467,14 @@ unlock: | |||
5467 | } | 5467 | } |
5468 | 5468 | ||
5469 | int gk20a_gr_lock_down_sm(struct gk20a *g, | 5469 | int gk20a_gr_lock_down_sm(struct gk20a *g, |
5470 | u32 gpc, u32 tpc, u32 global_esr_mask, | 5470 | u32 gpc, u32 tpc, u32 sm, u32 global_esr_mask, |
5471 | bool check_errors) | 5471 | bool check_errors) |
5472 | { | 5472 | { |
5473 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | 5473 | u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc); |
5474 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
5475 | u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; | ||
5476 | u32 dbgr_control0; | 5474 | u32 dbgr_control0; |
5477 | 5475 | ||
5478 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, | 5476 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, |
5479 | "GPC%d TPC%d: locking down SM", gpc, tpc); | 5477 | "GPC%d TPC%d SM%d: assert stop trigger", gpc, tpc, sm); |
5480 | 5478 | ||
5481 | /* assert stop trigger */ | 5479 | /* assert stop trigger */ |
5482 | dbgr_control0 = | 5480 | dbgr_control0 = |
@@ -5485,7 +5483,7 @@ int gk20a_gr_lock_down_sm(struct gk20a *g, | |||
5485 | gk20a_writel(g, | 5483 | gk20a_writel(g, |
5486 | gr_gpc0_tpc0_sm_dbgr_control0_r() + offset, dbgr_control0); | 5484 | gr_gpc0_tpc0_sm_dbgr_control0_r() + offset, dbgr_control0); |
5487 | 5485 | ||
5488 | return gk20a_gr_wait_for_sm_lock_down(g, gpc, tpc, global_esr_mask, | 5486 | return g->ops.gr.wait_for_sm_lock_down(g, gpc, tpc, sm, global_esr_mask, |
5489 | check_errors); | 5487 | check_errors); |
5490 | } | 5488 | } |
5491 | 5489 | ||
@@ -5598,7 +5596,8 @@ int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, | |||
5598 | } | 5596 | } |
5599 | 5597 | ||
5600 | if (do_warp_sync) { | 5598 | if (do_warp_sync) { |
5601 | ret = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask, true); | 5599 | ret = g->ops.gr.lock_down_sm(g, gpc, tpc, sm, |
5600 | global_mask, true); | ||
5602 | if (ret) { | 5601 | if (ret) { |
5603 | nvgpu_err(g, "sm did not lock down!"); | 5602 | nvgpu_err(g, "sm did not lock down!"); |
5604 | return ret; | 5603 | return ret; |
@@ -7906,25 +7905,21 @@ void gk20a_init_gr(struct gk20a *g) | |||
7906 | nvgpu_cond_init(&g->gr.init_wq); | 7905 | nvgpu_cond_init(&g->gr.init_wq); |
7907 | } | 7906 | } |
7908 | 7907 | ||
7909 | int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, | 7908 | int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, |
7910 | u32 global_esr_mask, bool check_errors) | 7909 | u32 global_esr_mask, bool check_errors) |
7911 | { | 7910 | { |
7912 | bool locked_down; | 7911 | bool locked_down; |
7913 | bool no_error_pending; | 7912 | bool no_error_pending; |
7914 | u32 delay = GR_IDLE_CHECK_DEFAULT; | 7913 | u32 delay = GR_IDLE_CHECK_DEFAULT; |
7915 | bool mmu_debug_mode_enabled = g->ops.fb.is_debug_mode_enabled(g); | 7914 | bool mmu_debug_mode_enabled = g->ops.fb.is_debug_mode_enabled(g); |
7916 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | 7915 | u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc); |
7917 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
7918 | u32 offset = | ||
7919 | gpc_stride * gpc + tpc_in_gpc_stride * tpc; | ||
7920 | u32 dbgr_status0 = 0, dbgr_control0 = 0; | 7916 | u32 dbgr_status0 = 0, dbgr_control0 = 0; |
7921 | u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0; | 7917 | u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0; |
7922 | struct nvgpu_timeout timeout; | 7918 | struct nvgpu_timeout timeout; |
7923 | u32 warp_esr; | 7919 | u32 warp_esr; |
7924 | u32 sm = 0; | ||
7925 | 7920 | ||
7926 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, | 7921 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, |
7927 | "GPC%d TPC%d: locking down SM", gpc, tpc); | 7922 | "GPC%d TPC%d SM%d: locking down SM", gpc, tpc, sm); |
7928 | 7923 | ||
7929 | nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g), | 7924 | nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g), |
7930 | NVGPU_TIMER_CPU_TIMER); | 7925 | NVGPU_TIMER_CPU_TIMER); |
@@ -7949,7 +7944,8 @@ int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, | |||
7949 | 7944 | ||
7950 | if (locked_down || no_error_pending) { | 7945 | if (locked_down || no_error_pending) { |
7951 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, | 7946 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, |
7952 | "GPC%d TPC%d: locked down SM", gpc, tpc); | 7947 | "GPC%d TPC%d SM%d: locked down SM", |
7948 | gpc, tpc, sm); | ||
7953 | return 0; | 7949 | return 0; |
7954 | } | 7950 | } |
7955 | 7951 | ||
@@ -7959,7 +7955,7 @@ int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, | |||
7959 | (g->ops.mm.mmu_fault_pending(g))) { | 7955 | (g->ops.mm.mmu_fault_pending(g))) { |
7960 | nvgpu_err(g, | 7956 | nvgpu_err(g, |
7961 | "GPC%d TPC%d: mmu fault pending," | 7957 | "GPC%d TPC%d: mmu fault pending," |
7962 | " sm will never lock down!", gpc, tpc); | 7958 | " SM%d will never lock down!", gpc, tpc, sm); |
7963 | return -EFAULT; | 7959 | return -EFAULT; |
7964 | } | 7960 | } |
7965 | 7961 | ||
@@ -8017,7 +8013,7 @@ void gk20a_gr_suspend_single_sm(struct gk20a *g, | |||
8017 | gk20a_writel(g, gr_gpc0_tpc0_sm_dbgr_control0_r() + offset, | 8013 | gk20a_writel(g, gr_gpc0_tpc0_sm_dbgr_control0_r() + offset, |
8018 | dbgr_control0); | 8014 | dbgr_control0); |
8019 | 8015 | ||
8020 | err = gk20a_gr_wait_for_sm_lock_down(g, gpc, tpc, | 8016 | err = g->ops.gr.wait_for_sm_lock_down(g, gpc, tpc, sm, |
8021 | global_esr_mask, check_errors); | 8017 | global_esr_mask, check_errors); |
8022 | if (err) { | 8018 | if (err) { |
8023 | nvgpu_err(g, | 8019 | nvgpu_err(g, |
@@ -8030,9 +8026,10 @@ void gk20a_gr_suspend_all_sms(struct gk20a *g, | |||
8030 | u32 global_esr_mask, bool check_errors) | 8026 | u32 global_esr_mask, bool check_errors) |
8031 | { | 8027 | { |
8032 | struct gr_gk20a *gr = &g->gr; | 8028 | struct gr_gk20a *gr = &g->gr; |
8033 | u32 gpc, tpc; | 8029 | u32 gpc, tpc, sm; |
8034 | int err; | 8030 | int err; |
8035 | u32 dbgr_control0; | 8031 | u32 dbgr_control0; |
8032 | u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); | ||
8036 | 8033 | ||
8037 | /* if an SM debugger isn't attached, skip suspend */ | 8034 | /* if an SM debugger isn't attached, skip suspend */ |
8038 | if (!g->ops.gr.sm_debugger_attached(g)) { | 8035 | if (!g->ops.gr.sm_debugger_attached(g)) { |
@@ -8055,13 +8052,14 @@ void gk20a_gr_suspend_all_sms(struct gk20a *g, | |||
8055 | 8052 | ||
8056 | for (gpc = 0; gpc < gr->gpc_count; gpc++) { | 8053 | for (gpc = 0; gpc < gr->gpc_count; gpc++) { |
8057 | for (tpc = 0; tpc < gr_gk20a_get_tpc_count(gr, gpc); tpc++) { | 8054 | for (tpc = 0; tpc < gr_gk20a_get_tpc_count(gr, gpc); tpc++) { |
8058 | err = | 8055 | for (sm = 0; sm < sm_per_tpc; sm++) { |
8059 | gk20a_gr_wait_for_sm_lock_down(g, gpc, tpc, | 8056 | err = g->ops.gr.wait_for_sm_lock_down(g, |
8057 | gpc, tpc, sm, | ||
8060 | global_esr_mask, check_errors); | 8058 | global_esr_mask, check_errors); |
8061 | if (err) { | 8059 | if (err) { |
8062 | nvgpu_err(g, | 8060 | nvgpu_err(g, "SuspendAllSms failed"); |
8063 | "SuspendAllSms failed"); | 8061 | return; |
8064 | return; | 8062 | } |
8065 | } | 8063 | } |
8066 | } | 8064 | } |
8067 | } | 8065 | } |
@@ -8371,7 +8369,7 @@ int gr_gk20a_wait_for_pause(struct gk20a *g, struct warpstate *w_state) | |||
8371 | { | 8369 | { |
8372 | int err = 0; | 8370 | int err = 0; |
8373 | struct gr_gk20a *gr = &g->gr; | 8371 | struct gr_gk20a *gr = &g->gr; |
8374 | u32 gpc, tpc, sm_id; | 8372 | u32 gpc, tpc, sm, sm_id; |
8375 | u32 global_mask; | 8373 | u32 global_mask; |
8376 | 8374 | ||
8377 | /* Wait for the SMs to reach full stop. This condition is: | 8375 | /* Wait for the SMs to reach full stop. This condition is: |
@@ -8386,9 +8384,10 @@ int gr_gk20a_wait_for_pause(struct gk20a *g, struct warpstate *w_state) | |||
8386 | 8384 | ||
8387 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; | 8385 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; |
8388 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; | 8386 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; |
8387 | sm = g->gr.sm_to_cluster[sm_id].sm_index; | ||
8389 | 8388 | ||
8390 | err = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask, false); | 8389 | err = g->ops.gr.lock_down_sm(g, gpc, tpc, sm, |
8391 | 8390 | global_mask, false); | |
8392 | if (err) { | 8391 | if (err) { |
8393 | nvgpu_err(g, "sm did not lock down!"); | 8392 | nvgpu_err(g, "sm did not lock down!"); |
8394 | return err; | 8393 | return err; |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 8230ba72..77db5cf6 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h | |||
@@ -622,9 +622,6 @@ void gk20a_gr_suspend_single_sm(struct gk20a *g, | |||
622 | void gk20a_gr_suspend_all_sms(struct gk20a *g, | 622 | void gk20a_gr_suspend_all_sms(struct gk20a *g, |
623 | u32 global_esr_mask, bool check_errors); | 623 | u32 global_esr_mask, bool check_errors); |
624 | u32 gr_gk20a_get_tpc_count(struct gr_gk20a *gr, u32 gpc_index); | 624 | u32 gr_gk20a_get_tpc_count(struct gr_gk20a *gr, u32 gpc_index); |
625 | int gk20a_gr_lock_down_sm(struct gk20a *g, | ||
626 | u32 gpc, u32 tpc, u32 global_esr_mask, | ||
627 | bool check_errors); | ||
628 | int gr_gk20a_set_sm_debug_mode(struct gk20a *g, | 625 | int gr_gk20a_set_sm_debug_mode(struct gk20a *g, |
629 | struct channel_gk20a *ch, u64 sms, bool enable); | 626 | struct channel_gk20a *ch, u64 sms, bool enable); |
630 | bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch); | 627 | bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch); |
@@ -681,7 +678,10 @@ static inline void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g) | |||
681 | 678 | ||
682 | int gk20a_gr_handle_fecs_error(struct gk20a *g, struct channel_gk20a *ch, | 679 | int gk20a_gr_handle_fecs_error(struct gk20a *g, struct channel_gk20a *ch, |
683 | struct gr_gk20a_isr_data *isr_data); | 680 | struct gr_gk20a_isr_data *isr_data); |
684 | int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, | 681 | int gk20a_gr_lock_down_sm(struct gk20a *g, |
682 | u32 gpc, u32 tpc, u32 sm, u32 global_esr_mask, | ||
683 | bool check_errors); | ||
684 | int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, | ||
685 | u32 global_esr_mask, bool check_errors); | 685 | u32 global_esr_mask, bool check_errors); |
686 | void gk20a_gr_clear_sm_hww(struct gk20a *g, | 686 | void gk20a_gr_clear_sm_hww(struct gk20a *g, |
687 | u32 gpc, u32 tpc, u32 global_esr); | 687 | u32 gpc, u32 tpc, u32 global_esr); |
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index fecd487f..333f0340 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c | |||
@@ -1638,4 +1638,6 @@ void gm20b_init_gr(struct gpu_ops *gops) | |||
1638 | gops->gr.get_sm_hww_global_esr = gk20a_gr_get_sm_hww_global_esr; | 1638 | gops->gr.get_sm_hww_global_esr = gk20a_gr_get_sm_hww_global_esr; |
1639 | gops->gr.get_sm_no_lock_down_hww_global_esr_mask = | 1639 | gops->gr.get_sm_no_lock_down_hww_global_esr_mask = |
1640 | gk20a_gr_get_sm_no_lock_down_hww_global_esr_mask; | 1640 | gk20a_gr_get_sm_no_lock_down_hww_global_esr_mask; |
1641 | gops->gr.lock_down_sm = gk20a_gr_lock_down_sm; | ||
1642 | gops->gr.wait_for_sm_lock_down = gk20a_gr_wait_for_sm_lock_down; | ||
1641 | } | 1643 | } |