diff options
author | Cory Perry <cperry@nvidia.com> | 2017-03-07 12:32:53 -0500 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-03-14 14:46:52 -0400 |
commit | de568db9dee599fa27cdc2ead88186099fff3c3b (patch) | |
tree | e7f448c7b79aece79e4a98aef334f17d8fedca40 | |
parent | 403874fa75dbb00e974a8d0f88b6e92be01ba42e (diff) |
gpu: nvgpu: fix suspending all SMs
In gk20a_suspend_all_sms(), we currently loop
over all GPCs and then loop over all TPCs in inner
loop
But this is incorrect and leads to SM with
invalid GPC,TPC ids
Fix this by looping over number of TPCs in each
GPC in inner loop
Also, fix gk20a_gr_wait_for_sm_lock_down() as
per below
- we right now wait infinitely for SM to lock down
- restrict this wait with a timeout on silicon
platforms
- return ETIMEDOUT instead of EAGAIN
- add more debug prints with additional data
for SM lock down failures
Bug 200258704
Change-Id: Id6fe32e579647fd8ac287a4b2ec80cbf98791e0d
Signed-off-by: Cory Perry <cperry@nvidia.com>
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1316471
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 37 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/hw/gk20a/hw_gr_gk20a.h | 14 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/hw/gm206/hw_gr_gm206.h | 14 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_gr_gm20b.h | 12 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/hw/gp106/hw_gr_gp106.h | 12 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_gr_gp10b.h | 12 |
6 files changed, 93 insertions, 8 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index afa665ab..4dec9e99 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -8588,17 +8588,23 @@ int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, | |||
8588 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | 8588 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); |
8589 | u32 offset = | 8589 | u32 offset = |
8590 | gpc_stride * gpc + tpc_in_gpc_stride * tpc; | 8590 | gpc_stride * gpc + tpc_in_gpc_stride * tpc; |
8591 | u32 dbgr_status0 = 0, dbgr_control0 = 0; | ||
8592 | u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0; | ||
8593 | struct nvgpu_timeout timeout; | ||
8591 | 8594 | ||
8592 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, | 8595 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, |
8593 | "GPC%d TPC%d: locking down SM", gpc, tpc); | 8596 | "GPC%d TPC%d: locking down SM", gpc, tpc); |
8594 | 8597 | ||
8598 | nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g), | ||
8599 | NVGPU_TIMER_CPU_TIMER); | ||
8600 | |||
8595 | /* wait for the sm to lock down */ | 8601 | /* wait for the sm to lock down */ |
8596 | do { | 8602 | do { |
8597 | u32 global_esr = gk20a_readl(g, | 8603 | u32 global_esr = gk20a_readl(g, |
8598 | gr_gpc0_tpc0_sm_hww_global_esr_r() + offset); | 8604 | gr_gpc0_tpc0_sm_hww_global_esr_r() + offset); |
8599 | u32 warp_esr = gk20a_readl(g, | 8605 | u32 warp_esr = gk20a_readl(g, |
8600 | gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset); | 8606 | gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset); |
8601 | u32 dbgr_status0 = gk20a_readl(g, | 8607 | dbgr_status0 = gk20a_readl(g, |
8602 | gr_gpc0_tpc0_sm_dbgr_status0_r() + offset); | 8608 | gr_gpc0_tpc0_sm_dbgr_status0_r() + offset); |
8603 | 8609 | ||
8604 | warp_esr = g->ops.gr.mask_hww_warp_esr(warp_esr); | 8610 | warp_esr = g->ops.gr.mask_hww_warp_esr(warp_esr); |
@@ -8630,13 +8636,32 @@ int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, | |||
8630 | 8636 | ||
8631 | usleep_range(delay, delay * 2); | 8637 | usleep_range(delay, delay * 2); |
8632 | delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); | 8638 | delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); |
8633 | } while (!locked_down); | 8639 | } while (!nvgpu_timeout_expired(&timeout) |
8640 | || !tegra_platform_is_silicon()); | ||
8641 | |||
8642 | dbgr_control0 = gk20a_readl(g, | ||
8643 | gr_gpc0_tpc0_sm_dbgr_control0_r() + offset); | ||
8634 | 8644 | ||
8645 | /* 64 bit read */ | ||
8646 | warps_valid = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_1_r() + offset) << 32; | ||
8647 | warps_valid |= gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + offset); | ||
8648 | |||
8649 | /* 64 bit read */ | ||
8650 | warps_paused = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_1_r() + offset) << 32; | ||
8651 | warps_paused |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + offset); | ||
8652 | |||
8653 | /* 64 bit read */ | ||
8654 | warps_trapped = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_1_r() + offset) << 32; | ||
8655 | warps_trapped |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + offset); | ||
8656 | |||
8657 | gk20a_err(dev_from_gk20a(g), | ||
8658 | "GPC%d TPC%d: timed out while trying to lock down SM", gpc, tpc); | ||
8635 | gk20a_err(dev_from_gk20a(g), | 8659 | gk20a_err(dev_from_gk20a(g), |
8636 | "GPC%d TPC%d: timed out while trying to lock down SM", | 8660 | "STATUS0(0x%x)=0x%x CONTROL0=0x%x VALID_MASK=0x%llx PAUSE_MASK=0x%llx TRAP_MASK=0x%llx\n", |
8637 | gpc, tpc); | 8661 | gr_gpc0_tpc0_sm_dbgr_status0_r() + offset, dbgr_status0, dbgr_control0, |
8662 | warps_valid, warps_paused, warps_trapped); | ||
8638 | 8663 | ||
8639 | return -EAGAIN; | 8664 | return -ETIMEDOUT; |
8640 | } | 8665 | } |
8641 | 8666 | ||
8642 | void gk20a_suspend_single_sm(struct gk20a *g, | 8667 | void gk20a_suspend_single_sm(struct gk20a *g, |
@@ -8699,7 +8724,7 @@ void gk20a_suspend_all_sms(struct gk20a *g, | |||
8699 | gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0); | 8724 | gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0); |
8700 | 8725 | ||
8701 | for (gpc = 0; gpc < gr->gpc_count; gpc++) { | 8726 | for (gpc = 0; gpc < gr->gpc_count; gpc++) { |
8702 | for (tpc = 0; tpc < gr->tpc_count; tpc++) { | 8727 | for (tpc = 0; tpc < gr_gk20a_get_tpc_count(gr, gpc); tpc++) { |
8703 | err = | 8728 | err = |
8704 | gk20a_gr_wait_for_sm_lock_down(g, gpc, tpc, | 8729 | gk20a_gr_wait_for_sm_lock_down(g, gpc, tpc, |
8705 | global_esr_mask, check_errors); | 8730 | global_esr_mask, check_errors); |
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gk20a/hw_gr_gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gk20a/hw_gr_gk20a.h index 1a888b53..7f6f58f3 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gk20a/hw_gr_gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gk20a/hw_gr_gk20a.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved. | 2 | * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify it | 4 | * This program is free software; you can redistribute it and/or modify it |
5 | * under the terms and conditions of the GNU General Public License, | 5 | * under the terms and conditions of the GNU General Public License, |
@@ -3178,14 +3178,26 @@ static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_r(void) | |||
3178 | { | 3178 | { |
3179 | return 0x00504614; | 3179 | return 0x00504614; |
3180 | } | 3180 | } |
3181 | static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_1_r(void) | ||
3182 | { | ||
3183 | return 0x00504618; | ||
3184 | } | ||
3181 | static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r(void) | 3185 | static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r(void) |
3182 | { | 3186 | { |
3183 | return 0x00504624; | 3187 | return 0x00504624; |
3184 | } | 3188 | } |
3189 | static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_1_r(void) | ||
3190 | { | ||
3191 | return 0x00504628; | ||
3192 | } | ||
3185 | static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r(void) | 3193 | static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r(void) |
3186 | { | 3194 | { |
3187 | return 0x00504634; | 3195 | return 0x00504634; |
3188 | } | 3196 | } |
3197 | static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_1_r(void) | ||
3198 | { | ||
3199 | return 0x00504638; | ||
3200 | } | ||
3189 | static inline u32 gr_gpcs_tpcs_sm_dbgr_bpt_pause_mask_r(void) | 3201 | static inline u32 gr_gpcs_tpcs_sm_dbgr_bpt_pause_mask_r(void) |
3190 | { | 3202 | { |
3191 | return 0x00419e24; | 3203 | return 0x00419e24; |
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gm206/hw_gr_gm206.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gm206/hw_gr_gm206.h index ff677844..f0dea40a 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gm206/hw_gr_gm206.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gm206/hw_gr_gm206.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | 2 | * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify it | 4 | * This program is free software; you can redistribute it and/or modify it |
5 | * under the terms and conditions of the GNU General Public License, | 5 | * under the terms and conditions of the GNU General Public License, |
@@ -3174,14 +3174,26 @@ static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_r(void) | |||
3174 | { | 3174 | { |
3175 | return 0x00504614; | 3175 | return 0x00504614; |
3176 | } | 3176 | } |
3177 | static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_1_r(void) | ||
3178 | { | ||
3179 | return 0x00504618; | ||
3180 | } | ||
3177 | static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r(void) | 3181 | static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r(void) |
3178 | { | 3182 | { |
3179 | return 0x00504624; | 3183 | return 0x00504624; |
3180 | } | 3184 | } |
3185 | static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_1_r(void) | ||
3186 | { | ||
3187 | return 0x00504628; | ||
3188 | } | ||
3181 | static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r(void) | 3189 | static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r(void) |
3182 | { | 3190 | { |
3183 | return 0x00504634; | 3191 | return 0x00504634; |
3184 | } | 3192 | } |
3193 | static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_1_r(void) | ||
3194 | { | ||
3195 | return 0x00504638; | ||
3196 | } | ||
3185 | static inline u32 gr_gpcs_tpcs_sm_dbgr_bpt_pause_mask_r(void) | 3197 | static inline u32 gr_gpcs_tpcs_sm_dbgr_bpt_pause_mask_r(void) |
3186 | { | 3198 | { |
3187 | return 0x00419e24; | 3199 | return 0x00419e24; |
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_gr_gm20b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_gr_gm20b.h index 30436fb1..bc966416 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_gr_gm20b.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_gr_gm20b.h | |||
@@ -3250,6 +3250,10 @@ static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_r(void) | |||
3250 | { | 3250 | { |
3251 | return 0x00504614; | 3251 | return 0x00504614; |
3252 | } | 3252 | } |
3253 | static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_1_r(void) | ||
3254 | { | ||
3255 | return 0x00504618; | ||
3256 | } | ||
3253 | static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_2_r(void) | 3257 | static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_2_r(void) |
3254 | { | 3258 | { |
3255 | return 0x0050461c; | 3259 | return 0x0050461c; |
@@ -3258,6 +3262,10 @@ static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r(void) | |||
3258 | { | 3262 | { |
3259 | return 0x00504624; | 3263 | return 0x00504624; |
3260 | } | 3264 | } |
3265 | static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_1_r(void) | ||
3266 | { | ||
3267 | return 0x00504628; | ||
3268 | } | ||
3261 | static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_2_r(void) | 3269 | static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_2_r(void) |
3262 | { | 3270 | { |
3263 | return 0x00504750; | 3271 | return 0x00504750; |
@@ -3266,6 +3274,10 @@ static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r(void) | |||
3266 | { | 3274 | { |
3267 | return 0x00504634; | 3275 | return 0x00504634; |
3268 | } | 3276 | } |
3277 | static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_1_r(void) | ||
3278 | { | ||
3279 | return 0x00504638; | ||
3280 | } | ||
3269 | static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_2_r(void) | 3281 | static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_2_r(void) |
3270 | { | 3282 | { |
3271 | return 0x00504758; | 3283 | return 0x00504758; |
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gp106/hw_gr_gp106.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gp106/hw_gr_gp106.h index c20da067..c6490f7a 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gp106/hw_gr_gp106.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gp106/hw_gr_gp106.h | |||
@@ -3478,14 +3478,26 @@ static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_r(void) | |||
3478 | { | 3478 | { |
3479 | return 0x00504614; | 3479 | return 0x00504614; |
3480 | } | 3480 | } |
3481 | static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_1_r(void) | ||
3482 | { | ||
3483 | return 0x00504618; | ||
3484 | } | ||
3481 | static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r(void) | 3485 | static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r(void) |
3482 | { | 3486 | { |
3483 | return 0x00504624; | 3487 | return 0x00504624; |
3484 | } | 3488 | } |
3489 | static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_1_r(void) | ||
3490 | { | ||
3491 | return 0x00504628; | ||
3492 | } | ||
3485 | static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r(void) | 3493 | static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r(void) |
3486 | { | 3494 | { |
3487 | return 0x00504634; | 3495 | return 0x00504634; |
3488 | } | 3496 | } |
3497 | static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_1_r(void) | ||
3498 | { | ||
3499 | return 0x00504638; | ||
3500 | } | ||
3489 | static inline u32 gr_gpcs_tpcs_sm_dbgr_bpt_pause_mask_r(void) | 3501 | static inline u32 gr_gpcs_tpcs_sm_dbgr_bpt_pause_mask_r(void) |
3490 | { | 3502 | { |
3491 | return 0x00419e24; | 3503 | return 0x00419e24; |
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_gr_gp10b.h index 7989337c..12ba42a9 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_gr_gp10b.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_gr_gp10b.h | |||
@@ -3606,14 +3606,26 @@ static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_r(void) | |||
3606 | { | 3606 | { |
3607 | return 0x00504614; | 3607 | return 0x00504614; |
3608 | } | 3608 | } |
3609 | static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_1_r(void) | ||
3610 | { | ||
3611 | return 0x00504618; | ||
3612 | } | ||
3609 | static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r(void) | 3613 | static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r(void) |
3610 | { | 3614 | { |
3611 | return 0x00504624; | 3615 | return 0x00504624; |
3612 | } | 3616 | } |
3617 | static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_1_r(void) | ||
3618 | { | ||
3619 | return 0x00504628; | ||
3620 | } | ||
3613 | static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r(void) | 3621 | static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r(void) |
3614 | { | 3622 | { |
3615 | return 0x00504634; | 3623 | return 0x00504634; |
3616 | } | 3624 | } |
3625 | static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_1_r(void) | ||
3626 | { | ||
3627 | return 0x00504638; | ||
3628 | } | ||
3617 | static inline u32 gr_gpcs_tpcs_sm_dbgr_bpt_pause_mask_r(void) | 3629 | static inline u32 gr_gpcs_tpcs_sm_dbgr_bpt_pause_mask_r(void) |
3618 | { | 3630 | { |
3619 | return 0x00419e24; | 3631 | return 0x00419e24; |