summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c')
-rw-r--r--drivers/gpu/nvgpu/gv11b/gr_gv11b.c87
1 files changed, 86 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index 179c7d33..ad34233c 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -108,6 +108,89 @@ static bool gr_gv11b_is_valid_compute_class(struct gk20a *g, u32 class_num)
108 return valid; 108 return valid;
109} 109}
110 110
111static int gr_gv11b_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32 tpc,
112 bool *post_event, struct channel_gk20a *fault_ch,
113 u32 *hww_global_esr)
114{
115 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
116 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
117 u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
118 u32 l1_tag_ecc_status, l1_tag_ecc_corrected_err_status = 0;
119 u32 l1_tag_ecc_uncorrected_err_status = 0;
120 u32 l1_tag_corrected_err_count_delta = 0;
121 u32 l1_tag_uncorrected_err_count_delta = 0;
122 bool is_l1_tag_ecc_corrected_total_err_overflow = 0;
123 bool is_l1_tag_ecc_uncorrected_total_err_overflow = 0;
124
125 /* Check for L1 tag ECC errors. */
126 l1_tag_ecc_status = gk20a_readl(g,
127 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_r() + offset);
128 l1_tag_ecc_corrected_err_status = l1_tag_ecc_status &
129 (gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_corrected_err_el1_0_m() |
130 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_corrected_err_el1_1_m() |
131 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_corrected_err_pixrpf_m() |
132 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_corrected_err_miss_fifo_m());
133 l1_tag_ecc_uncorrected_err_status = l1_tag_ecc_status &
134 (gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_uncorrected_err_el1_0_m() |
135 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_uncorrected_err_el1_1_m() |
136 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_uncorrected_err_pixrpf_m() |
137 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_uncorrected_err_miss_fifo_m());
138
139 if ((l1_tag_ecc_corrected_err_status == 0) && (l1_tag_ecc_uncorrected_err_status == 0))
140 return 0;
141
142 l1_tag_corrected_err_count_delta =
143 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_total_v(
144 gk20a_readl(g,
145 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_r() +
146 offset));
147 l1_tag_uncorrected_err_count_delta =
148 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_total_v(
149 gk20a_readl(g,
150 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_r() +
151 offset));
152 is_l1_tag_ecc_corrected_total_err_overflow =
153 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_corrected_err_total_counter_overflow_v(l1_tag_ecc_status);
154 is_l1_tag_ecc_uncorrected_total_err_overflow =
155 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_uncorrected_err_total_counter_overflow_v(l1_tag_ecc_status);
156
157 if ((l1_tag_corrected_err_count_delta > 0) || is_l1_tag_ecc_corrected_total_err_overflow) {
158 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
159 "corrected error (SBE) detected in SM L1 tag! err_mask [%08x] is_overf [%d]",
160 l1_tag_ecc_corrected_err_status, is_l1_tag_ecc_corrected_total_err_overflow);
161
162 /* HW uses 16-bits counter */
163 l1_tag_corrected_err_count_delta +=
164 (is_l1_tag_ecc_corrected_total_err_overflow <<
165 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_total_s());
166 g->gr.t19x.ecc_stats.sm_l1_tag_corrected_err_count.counters[tpc] +=
167 l1_tag_corrected_err_count_delta;
168 gk20a_writel(g,
169 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_r() + offset,
170 0);
171 }
172 if ((l1_tag_uncorrected_err_count_delta > 0) || is_l1_tag_ecc_uncorrected_total_err_overflow) {
173 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
174 "Uncorrected error (DBE) detected in SM L1 tag! err_mask [%08x] is_overf [%d]",
175 l1_tag_ecc_uncorrected_err_status, is_l1_tag_ecc_uncorrected_total_err_overflow);
176
177 /* HW uses 16-bits counter */
178 l1_tag_uncorrected_err_count_delta +=
179 (is_l1_tag_ecc_uncorrected_total_err_overflow <<
180 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_total_s());
181 g->gr.t19x.ecc_stats.sm_l1_tag_uncorrected_err_count.counters[tpc] +=
182 l1_tag_uncorrected_err_count_delta;
183 gk20a_writel(g,
184 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_r() + offset,
185 0);
186 }
187
188 gk20a_writel(g, gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_r() + offset,
189 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_reset_task_f());
190
191 return 0;
192
193}
111 194
112static int gr_gv11b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, 195static int gr_gv11b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,
113 bool *post_event, struct channel_gk20a *fault_ch, 196 bool *post_event, struct channel_gk20a *fault_ch,
@@ -118,7 +201,8 @@ static int gr_gv11b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,
118 proj_tpc_in_gpc_stride_v() * tpc; 201 proj_tpc_in_gpc_stride_v() * tpc;
119 u32 lrf_ecc_status; 202 u32 lrf_ecc_status;
120 203
121 gr_gk20a_handle_sm_exception(g, gpc, tpc, post_event, fault_ch, hww_global_esr); 204 /* Check for L1 tag ECC errors. */
205 gr_gv11b_handle_l1_tag_exception(g, gpc, tpc, post_event, fault_ch, hww_global_esr);
122 206
123 /* Check for LRF ECC errors. */ 207 /* Check for LRF ECC errors. */
124 lrf_ecc_status = gk20a_readl(g, 208 lrf_ecc_status = gk20a_readl(g,
@@ -1692,6 +1776,7 @@ void gv11b_init_gr(struct gpu_ops *gops)
1692 gops->gr.pre_process_sm_exception = 1776 gops->gr.pre_process_sm_exception =
1693 gr_gv11b_pre_process_sm_exception; 1777 gr_gv11b_pre_process_sm_exception;
1694 gops->gr.handle_fecs_error = gr_gv11b_handle_fecs_error; 1778 gops->gr.handle_fecs_error = gr_gv11b_handle_fecs_error;
1779 gops->gr.create_gr_sysfs = gr_gv11b_create_sysfs;
1695 gops->gr.setup_rop_mapping = gr_gv11b_setup_rop_mapping; 1780 gops->gr.setup_rop_mapping = gr_gv11b_setup_rop_mapping;
1696 gops->gr.init_sw_veid_bundle = gr_gv11b_init_sw_veid_bundle; 1781 gops->gr.init_sw_veid_bundle = gr_gv11b_init_sw_veid_bundle;
1697 gops->gr.program_zcull_mapping = gr_gv11b_program_zcull_mapping; 1782 gops->gr.program_zcull_mapping = gr_gv11b_program_zcull_mapping;