diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 158 |
1 files changed, 155 insertions, 3 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index a726d058..63107cfc 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c | |||
@@ -2781,7 +2781,7 @@ static void gv11b_gr_suspend_single_sm(struct gk20a *g, | |||
2781 | gk20a_writel(g, gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset, | 2781 | gk20a_writel(g, gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset, |
2782 | dbgr_control0); | 2782 | dbgr_control0); |
2783 | 2783 | ||
2784 | err = gk20a_gr_wait_for_sm_lock_down(g, gpc, tpc, | 2784 | err = g->ops.gr.wait_for_sm_lock_down(g, gpc, tpc, sm, |
2785 | global_esr_mask, check_errors); | 2785 | global_esr_mask, check_errors); |
2786 | if (err) { | 2786 | if (err) { |
2787 | nvgpu_err(g, | 2787 | nvgpu_err(g, |
@@ -2822,8 +2822,8 @@ static void gv11b_gr_suspend_all_sms(struct gk20a *g, | |||
2822 | for (gpc = 0; gpc < gr->gpc_count; gpc++) { | 2822 | for (gpc = 0; gpc < gr->gpc_count; gpc++) { |
2823 | for (tpc = 0; tpc < gr_gk20a_get_tpc_count(gr, gpc); tpc++) { | 2823 | for (tpc = 0; tpc < gr_gk20a_get_tpc_count(gr, gpc); tpc++) { |
2824 | for (sm = 0; sm < sm_per_tpc; sm++) { | 2824 | for (sm = 0; sm < sm_per_tpc; sm++) { |
2825 | err = gk20a_gr_wait_for_sm_lock_down(g, | 2825 | err = g->ops.gr.wait_for_sm_lock_down(g, |
2826 | gpc, tpc, | 2826 | gpc, tpc, sm, |
2827 | global_esr_mask, check_errors); | 2827 | global_esr_mask, check_errors); |
2828 | if (err) { | 2828 | if (err) { |
2829 | nvgpu_err(g, | 2829 | nvgpu_err(g, |
@@ -3026,6 +3026,156 @@ static u32 gv11b_gr_get_sm_no_lock_down_hww_global_esr_mask(struct gk20a *g) | |||
3026 | return global_esr_mask; | 3026 | return global_esr_mask; |
3027 | } | 3027 | } |
3028 | 3028 | ||
3029 | static void gv11b_gr_sm_dump_warp_bpt_pause_trap_mask_regs(struct gk20a *g, | ||
3030 | u32 offset, bool timeout) | ||
3031 | { | ||
3032 | u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0; | ||
3033 | u32 dbgr_control0 = gk20a_readl(g, | ||
3034 | gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset); | ||
3035 | u32 dbgr_status0 = gk20a_readl(g, | ||
3036 | gr_gpc0_tpc0_sm0_dbgr_status0_r() + offset); | ||
3037 | /* 64 bit read */ | ||
3038 | warps_valid = | ||
3039 | (u64)gk20a_readl(g, gr_gpc0_tpc0_sm0_warp_valid_mask_1_r() + | ||
3040 | offset) << 32; | ||
3041 | warps_valid |= gk20a_readl(g, | ||
3042 | gr_gpc0_tpc0_sm0_warp_valid_mask_0_r() + offset); | ||
3043 | |||
3044 | /* 64 bit read */ | ||
3045 | warps_paused = | ||
3046 | (u64)gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_bpt_pause_mask_1_r() + | ||
3047 | offset) << 32; | ||
3048 | warps_paused |= gk20a_readl(g, | ||
3049 | gr_gpc0_tpc0_sm0_dbgr_bpt_pause_mask_0_r() + offset); | ||
3050 | |||
3051 | /* 64 bit read */ | ||
3052 | warps_trapped = | ||
3053 | (u64)gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_bpt_trap_mask_1_r() + | ||
3054 | offset) << 32; | ||
3055 | warps_trapped |= gk20a_readl(g, | ||
3056 | gr_gpc0_tpc0_sm0_dbgr_bpt_trap_mask_0_r() + offset); | ||
3057 | if (timeout) | ||
3058 | nvgpu_err(g, | ||
3059 | "STATUS0=0x%x CONTROL0=0x%x VALID_MASK=0x%llx " | ||
3060 | "PAUSE_MASK=0x%llx TRAP_MASK=0x%llx\n", | ||
3061 | dbgr_status0, dbgr_control0, warps_valid, | ||
3062 | warps_paused, warps_trapped); | ||
3063 | else | ||
3064 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, | ||
3065 | "STATUS0=0x%x CONTROL0=0x%x VALID_MASK=0x%llx " | ||
3066 | "PAUSE_MASK=0x%llx TRAP_MASK=0x%llx\n", | ||
3067 | dbgr_status0, dbgr_control0, warps_valid, | ||
3068 | warps_paused, warps_trapped); | ||
3069 | } | ||
3070 | |||
3071 | static int gv11b_gr_wait_for_sm_lock_down(struct gk20a *g, | ||
3072 | u32 gpc, u32 tpc, u32 sm, | ||
3073 | u32 global_esr_mask, bool check_errors) | ||
3074 | { | ||
3075 | bool locked_down; | ||
3076 | bool no_error_pending; | ||
3077 | u32 delay = GR_IDLE_CHECK_DEFAULT; | ||
3078 | bool mmu_debug_mode_enabled = g->ops.fb.is_debug_mode_enabled(g); | ||
3079 | u32 dbgr_status0 = 0; | ||
3080 | u32 warp_esr, global_esr; | ||
3081 | struct nvgpu_timeout timeout; | ||
3082 | u32 offset = gk20a_gr_gpc_offset(g, gpc) + | ||
3083 | gk20a_gr_tpc_offset(g, tpc); | ||
3084 | gv11b_gr_sm_offset(g, sm); | ||
3085 | |||
3086 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, | ||
3087 | "GPC%d TPC%d: locking down SM%d", gpc, tpc, sm); | ||
3088 | |||
3089 | nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g), | ||
3090 | NVGPU_TIMER_CPU_TIMER); | ||
3091 | |||
3092 | /* wait for the sm to lock down */ | ||
3093 | do { | ||
3094 | global_esr = g->ops.gr.get_sm_hww_global_esr(g, gpc, tpc, sm); | ||
3095 | dbgr_status0 = gk20a_readl(g, | ||
3096 | gr_gpc0_tpc0_sm0_dbgr_status0_r() + offset); | ||
3097 | |||
3098 | warp_esr = g->ops.gr.get_sm_hww_warp_esr(g, gpc, tpc, sm); | ||
3099 | |||
3100 | locked_down = | ||
3101 | (gr_gpc0_tpc0_sm0_dbgr_status0_locked_down_v(dbgr_status0) == | ||
3102 | gr_gpc0_tpc0_sm0_dbgr_status0_locked_down_true_v()); | ||
3103 | no_error_pending = | ||
3104 | check_errors && | ||
3105 | (gr_gpc0_tpc0_sm0_hww_warp_esr_error_v(warp_esr) == | ||
3106 | gr_gpc0_tpc0_sm0_hww_warp_esr_error_none_v()) && | ||
3107 | ((global_esr & ~global_esr_mask) == 0); | ||
3108 | |||
3109 | if (locked_down) { | ||
3110 | /* | ||
3111 | * if SM reports locked down, it means that SM is idle and | ||
3112 | * trapped and also that one of the these conditions are true | ||
3113 | * 1) sm is nonempty and all valid warps are paused | ||
3114 | * 2) sm is empty and held in trapped state due to stop trigger | ||
3115 | * 3) sm is nonempty and some warps are not paused, but are | ||
3116 | * instead held at RTT due to an "active" stop trigger | ||
3117 | * Check for Paused warp mask != Valid | ||
3118 | * warp mask after SM reports it is locked down in order to | ||
3119 | * distinguish case 1 from case 3. When case 3 is detected, | ||
3120 | * it implies a misprogrammed trap handler code, as all warps | ||
3121 | * in the handler must promise to BPT.PAUSE instead of RTT | ||
3122 | * whenever SR64 read in trap mode indicates stop trigger | ||
3123 | * is asserted. | ||
3124 | */ | ||
3125 | gv11b_gr_sm_dump_warp_bpt_pause_trap_mask_regs(g, | ||
3126 | offset, false); | ||
3127 | } | ||
3128 | |||
3129 | if (locked_down || no_error_pending) { | ||
3130 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, | ||
3131 | "GPC%d TPC%d: locked down SM%d", gpc, tpc, sm); | ||
3132 | return 0; | ||
3133 | } | ||
3134 | |||
3135 | /* if an mmu fault is pending and mmu debug mode is not | ||
3136 | * enabled, the sm will never lock down. | ||
3137 | */ | ||
3138 | if (!mmu_debug_mode_enabled && | ||
3139 | (g->ops.mm.mmu_fault_pending(g))) { | ||
3140 | nvgpu_err(g, | ||
3141 | "GPC%d TPC%d: mmu fault pending," | ||
3142 | " SM%d will never lock down!", gpc, tpc, sm); | ||
3143 | return -EFAULT; | ||
3144 | } | ||
3145 | |||
3146 | nvgpu_usleep_range(delay, delay * 2); | ||
3147 | delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); | ||
3148 | } while (!nvgpu_timeout_expired(&timeout)); | ||
3149 | |||
3150 | nvgpu_err(g, "GPC%d TPC%d: timed out while trying to " | ||
3151 | "lock down SM%d", gpc, tpc, sm); | ||
3152 | gv11b_gr_sm_dump_warp_bpt_pause_trap_mask_regs(g, offset, true); | ||
3153 | |||
3154 | return -ETIMEDOUT; | ||
3155 | } | ||
3156 | |||
3157 | static int gv11b_gr_lock_down_sm(struct gk20a *g, | ||
3158 | u32 gpc, u32 tpc, u32 sm, u32 global_esr_mask, | ||
3159 | bool check_errors) | ||
3160 | { | ||
3161 | u32 dbgr_control0; | ||
3162 | u32 offset = gk20a_gr_gpc_offset(g, gpc) + gk20a_gr_tpc_offset(g, tpc) + | ||
3163 | gv11b_gr_sm_offset(g, sm); | ||
3164 | |||
3165 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, | ||
3166 | "GPC%d TPC%d SM%d: assert stop trigger", gpc, tpc, sm); | ||
3167 | |||
3168 | /* assert stop trigger */ | ||
3169 | dbgr_control0 = | ||
3170 | gk20a_readl(g, gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset); | ||
3171 | dbgr_control0 |= gr_gpc0_tpc0_sm0_dbgr_control0_stop_trigger_enable_f(); | ||
3172 | gk20a_writel(g, | ||
3173 | gr_gpc0_tpc0_sm0_dbgr_control0_r() + offset, dbgr_control0); | ||
3174 | |||
3175 | return g->ops.gr.wait_for_sm_lock_down(g, gpc, tpc, sm, global_esr_mask, | ||
3176 | check_errors); | ||
3177 | } | ||
3178 | |||
3029 | void gv11b_init_gr(struct gpu_ops *gops) | 3179 | void gv11b_init_gr(struct gpu_ops *gops) |
3030 | { | 3180 | { |
3031 | gp10b_init_gr(gops); | 3181 | gp10b_init_gr(gops); |
@@ -3103,4 +3253,6 @@ void gv11b_init_gr(struct gpu_ops *gops) | |||
3103 | gops->gr.get_sm_hww_global_esr = gv11b_gr_get_sm_hww_global_esr; | 3253 | gops->gr.get_sm_hww_global_esr = gv11b_gr_get_sm_hww_global_esr; |
3104 | gops->gr.get_sm_no_lock_down_hww_global_esr_mask = | 3254 | gops->gr.get_sm_no_lock_down_hww_global_esr_mask = |
3105 | gv11b_gr_get_sm_no_lock_down_hww_global_esr_mask; | 3255 | gv11b_gr_get_sm_no_lock_down_hww_global_esr_mask; |
3256 | gops->gr.lock_down_sm = gv11b_gr_lock_down_sm; | ||
3257 | gops->gr.wait_for_sm_lock_down = gv11b_gr_wait_for_sm_lock_down; | ||
3106 | } | 3258 | } |