From de568db9dee599fa27cdc2ead88186099fff3c3b Mon Sep 17 00:00:00 2001 From: Cory Perry Date: Tue, 7 Mar 2017 09:32:53 -0800 Subject: gpu: nvgpu: fix suspending all SMs In gk20a_suspend_all_sms(), we currently loop over all GPCs and then loop over all TPCs in inner loop But this is incorrect and leads to SM with invalid GPC,TPC ids Fix this by looping over number of TPCs in each GPC in inner loop Also, fix gk20a_gr_wait_for_sm_lock_down() as per below - we right now wait infinitely for SM to lock down - restrict this wait with a timeout on silicon platforms - return ETIMEDOUT instead of EAGAIN - add more debug prints with additional data for SM lock down failures Bug 200258704 Change-Id: Id6fe32e579647fd8ac287a4b2ec80cbf98791e0d Signed-off-by: Cory Perry Signed-off-by: Deepak Nibade Reviewed-on: http://git-master/r/1316471 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 37 ++++++++++++++++++---- .../gpu/nvgpu/include/nvgpu/hw/gk20a/hw_gr_gk20a.h | 14 +++++++- .../gpu/nvgpu/include/nvgpu/hw/gm206/hw_gr_gm206.h | 14 +++++++- .../gpu/nvgpu/include/nvgpu/hw/gm20b/hw_gr_gm20b.h | 12 +++++++ .../gpu/nvgpu/include/nvgpu/hw/gp106/hw_gr_gp106.h | 12 +++++++ .../gpu/nvgpu/include/nvgpu/hw/gp10b/hw_gr_gp10b.h | 12 +++++++ 6 files changed, 93 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index afa665ab..4dec9e99 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -8588,17 +8588,23 @@ int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; + u32 dbgr_status0 = 0, dbgr_control0 = 0; + u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0; + struct nvgpu_timeout timeout; gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "GPC%d TPC%d: locking down SM", gpc, tpc); + nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g), + NVGPU_TIMER_CPU_TIMER); + /* wait for the sm to lock down */ do { u32 global_esr = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset); u32 warp_esr = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset); - u32 dbgr_status0 = gk20a_readl(g, + dbgr_status0 = gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_status0_r() + offset); warp_esr = g->ops.gr.mask_hww_warp_esr(warp_esr); @@ -8630,13 +8636,32 @@ int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, usleep_range(delay, delay * 2); delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); - } while (!locked_down); + } while (!nvgpu_timeout_expired(&timeout) + || !tegra_platform_is_silicon()); + + dbgr_control0 = gk20a_readl(g, + gr_gpc0_tpc0_sm_dbgr_control0_r() + offset); + /* 64 bit read */ + warps_valid = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_1_r() + offset) << 32; + warps_valid |= gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + offset); + + /* 64 bit read */ + warps_paused = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_1_r() + offset) << 32; + warps_paused |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + offset); + + /* 64 bit read */ + warps_trapped = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_1_r() + offset) << 32; + warps_trapped |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + offset); + + gk20a_err(dev_from_gk20a(g), + "GPC%d TPC%d: timed out while trying to lock down SM", gpc, tpc); gk20a_err(dev_from_gk20a(g), - "GPC%d TPC%d: timed out while trying to lock down SM", - gpc, tpc); + "STATUS0(0x%x)=0x%x CONTROL0=0x%x VALID_MASK=0x%llx PAUSE_MASK=0x%llx TRAP_MASK=0x%llx\n", + gr_gpc0_tpc0_sm_dbgr_status0_r() + offset, dbgr_status0, dbgr_control0, + warps_valid, warps_paused, warps_trapped); - return -EAGAIN; + return -ETIMEDOUT; } void gk20a_suspend_single_sm(struct gk20a *g, @@ -8699,7 +8724,7 @@ void gk20a_suspend_all_sms(struct gk20a *g, gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0); for (gpc = 0; gpc < gr->gpc_count; gpc++) { - for (tpc = 0; tpc < gr->tpc_count; tpc++) { + for (tpc = 0; tpc < gr_gk20a_get_tpc_count(gr, gpc); tpc++) { err = gk20a_gr_wait_for_sm_lock_down(g, gpc, tpc, global_esr_mask, check_errors); diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gk20a/hw_gr_gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gk20a/hw_gr_gk20a.h index 1a888b53..7f6f58f3 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gk20a/hw_gr_gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gk20a/hw_gr_gk20a.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -3178,14 +3178,26 @@ static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_r(void) { return 0x00504614; } +static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_1_r(void) +{ + return 0x00504618; +} static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r(void) { return 0x00504624; } +static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_1_r(void) +{ + return 0x00504628; +} static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r(void) { return 0x00504634; } +static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_1_r(void) +{ + return 0x00504638; +} static inline u32 gr_gpcs_tpcs_sm_dbgr_bpt_pause_mask_r(void) { return 0x00419e24; diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gm206/hw_gr_gm206.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gm206/hw_gr_gm206.h index ff677844..f0dea40a 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gm206/hw_gr_gm206.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gm206/hw_gr_gm206.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -3174,14 +3174,26 @@ static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_r(void) { return 0x00504614; } +static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_1_r(void) +{ + return 0x00504618; +} static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r(void) { return 0x00504624; } +static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_1_r(void) +{ + return 0x00504628; +} static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r(void) { return 0x00504634; } +static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_1_r(void) +{ + return 0x00504638; +} static inline u32 gr_gpcs_tpcs_sm_dbgr_bpt_pause_mask_r(void) { return 0x00419e24; diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_gr_gm20b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_gr_gm20b.h index 30436fb1..bc966416 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_gr_gm20b.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_gr_gm20b.h @@ -3250,6 +3250,10 @@ static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_r(void) { return 0x00504614; } +static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_1_r(void) +{ + return 0x00504618; +} static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_2_r(void) { return 0x0050461c; @@ -3258,6 +3262,10 @@ static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r(void) { return 0x00504624; } +static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_1_r(void) +{ + return 0x00504628; +} static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_2_r(void) { return 0x00504750; @@ -3266,6 +3274,10 @@ static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r(void) { return 0x00504634; } +static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_1_r(void) +{ + return 0x00504638; +} static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_2_r(void) { return 0x00504758; diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gp106/hw_gr_gp106.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gp106/hw_gr_gp106.h index c20da067..c6490f7a 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gp106/hw_gr_gp106.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gp106/hw_gr_gp106.h @@ -3478,14 +3478,26 @@ static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_r(void) { return 0x00504614; } +static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_1_r(void) +{ + return 0x00504618; +} static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r(void) { return 0x00504624; } +static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_1_r(void) +{ + return 0x00504628; +} static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r(void) { return 0x00504634; } +static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_1_r(void) +{ + return 0x00504638; +} static inline u32 gr_gpcs_tpcs_sm_dbgr_bpt_pause_mask_r(void) { return 0x00419e24; diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_gr_gp10b.h index 7989337c..12ba42a9 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_gr_gp10b.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_gr_gp10b.h @@ -3606,14 +3606,26 @@ static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_r(void) { return 0x00504614; } +static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_1_r(void) +{ + return 0x00504618; +} static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r(void) { return 0x00504624; } +static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_1_r(void) +{ + return 0x00504628; +} static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r(void) { return 0x00504634; } +static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_1_r(void) +{ + return 0x00504638; +} static inline u32 gr_gpcs_tpcs_sm_dbgr_bpt_pause_mask_r(void) { return 0x00419e24; -- cgit v1.2.2