summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/common/ltc/ltc_gp10b.c109
-rw-r--r--drivers/gpu/nvgpu/common/ltc/ltc_gp10b.h3
-rw-r--r--drivers/gpu/nvgpu/common/ltc/ltc_gv11b.c195
-rw-r--r--drivers/gpu/nvgpu/common/ltc/ltc_gv11b.h2
4 files changed, 163 insertions, 146 deletions
diff --git a/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.c b/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.c
index 9aabf543..4d11b44b 100644
--- a/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.c
+++ b/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.c
@@ -233,13 +233,68 @@ out:
233 return err; 233 return err;
234} 234}
235 235
236void gp10b_ltc_isr(struct gk20a *g) 236void gp10b_ltc_lts_isr(struct gk20a *g,
237 unsigned int ltc, unsigned int slice)
237{ 238{
238 u32 mc_intr, ltc_intr; 239 u32 offset;
239 unsigned int ltc, slice; 240 u32 ltc_intr;
240 u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); 241 u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
241 u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); 242 u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
242 243
244 offset = ltc_stride * ltc + lts_stride * slice;
245 ltc_intr = gk20a_readl(g, ltc_ltc0_lts0_intr_r() + offset);
246
247 /* Detect and handle ECC errors */
248 if (ltc_intr &
249 ltc_ltcs_ltss_intr_ecc_sec_error_pending_f()) {
250 u32 ecc_stats_reg_val;
251
252 nvgpu_err(g,
253 "Single bit error detected in GPU L2!");
254
255 ecc_stats_reg_val =
256 gk20a_readl(g,
257 ltc_ltc0_lts0_dstg_ecc_report_r() + offset);
258 g->ecc.ltc.ecc_sec_count[ltc][slice].counter +=
259 ltc_ltc0_lts0_dstg_ecc_report_sec_count_v(ecc_stats_reg_val);
260 ecc_stats_reg_val &=
261 ~(ltc_ltc0_lts0_dstg_ecc_report_sec_count_m());
262 nvgpu_writel_check(g,
263 ltc_ltc0_lts0_dstg_ecc_report_r() + offset,
264 ecc_stats_reg_val);
265 g->ops.mm.l2_flush(g, true);
266 }
267 if (ltc_intr &
268 ltc_ltcs_ltss_intr_ecc_ded_error_pending_f()) {
269 u32 ecc_stats_reg_val;
270
271 nvgpu_err(g,
272 "Double bit error detected in GPU L2!");
273
274 ecc_stats_reg_val =
275 gk20a_readl(g,
276 ltc_ltc0_lts0_dstg_ecc_report_r() + offset);
277 g->ecc.ltc.ecc_ded_count[ltc][slice].counter +=
278 ltc_ltc0_lts0_dstg_ecc_report_ded_count_v(ecc_stats_reg_val);
279 ecc_stats_reg_val &=
280 ~(ltc_ltc0_lts0_dstg_ecc_report_ded_count_m());
281 nvgpu_writel_check(g,
282 ltc_ltc0_lts0_dstg_ecc_report_r() + offset,
283 ecc_stats_reg_val);
284 }
285
286 nvgpu_err(g, "ltc%d, slice %d: %08x",
287 ltc, slice, ltc_intr);
288 nvgpu_writel_check(g, ltc_ltc0_lts0_intr_r() +
289 ltc_stride * ltc + lts_stride * slice,
290 ltc_intr);
291}
292
293void gp10b_ltc_isr(struct gk20a *g)
294{
295 u32 mc_intr;
296 unsigned int ltc, slice;
297
243 mc_intr = gk20a_readl(g, mc_intr_ltc_r()); 298 mc_intr = gk20a_readl(g, mc_intr_ltc_r());
244 nvgpu_err(g, "mc_ltc_intr: %08x", mc_intr); 299 nvgpu_err(g, "mc_ltc_intr: %08x", mc_intr);
245 for (ltc = 0; ltc < g->ltc_count; ltc++) { 300 for (ltc = 0; ltc < g->ltc_count; ltc++) {
@@ -247,53 +302,7 @@ void gp10b_ltc_isr(struct gk20a *g)
247 continue; 302 continue;
248 } 303 }
249 for (slice = 0; slice < g->gr.slices_per_ltc; slice++) { 304 for (slice = 0; slice < g->gr.slices_per_ltc; slice++) {
250 u32 offset = ltc_stride * ltc + lts_stride * slice; 305 gp10b_ltc_lts_isr(g, ltc, slice);
251 ltc_intr = gk20a_readl(g, ltc_ltc0_lts0_intr_r() + offset);
252
253 /* Detect and handle ECC errors */
254 if (ltc_intr &
255 ltc_ltcs_ltss_intr_ecc_sec_error_pending_f()) {
256 u32 ecc_stats_reg_val;
257
258 nvgpu_err(g,
259 "Single bit error detected in GPU L2!");
260
261 ecc_stats_reg_val =
262 gk20a_readl(g,
263 ltc_ltc0_lts0_dstg_ecc_report_r() + offset);
264 g->ecc.ltc.ecc_sec_count[ltc][slice].counter +=
265 ltc_ltc0_lts0_dstg_ecc_report_sec_count_v(ecc_stats_reg_val);
266 ecc_stats_reg_val &=
267 ~(ltc_ltc0_lts0_dstg_ecc_report_sec_count_m());
268 nvgpu_writel_check(g,
269 ltc_ltc0_lts0_dstg_ecc_report_r() + offset,
270 ecc_stats_reg_val);
271 g->ops.mm.l2_flush(g, true);
272 }
273 if (ltc_intr &
274 ltc_ltcs_ltss_intr_ecc_ded_error_pending_f()) {
275 u32 ecc_stats_reg_val;
276
277 nvgpu_err(g,
278 "Double bit error detected in GPU L2!");
279
280 ecc_stats_reg_val =
281 gk20a_readl(g,
282 ltc_ltc0_lts0_dstg_ecc_report_r() + offset);
283 g->ecc.ltc.ecc_ded_count[ltc][slice].counter +=
284 ltc_ltc0_lts0_dstg_ecc_report_ded_count_v(ecc_stats_reg_val);
285 ecc_stats_reg_val &=
286 ~(ltc_ltc0_lts0_dstg_ecc_report_ded_count_m());
287 nvgpu_writel_check(g,
288 ltc_ltc0_lts0_dstg_ecc_report_r() + offset,
289 ecc_stats_reg_val);
290 }
291
292 nvgpu_err(g, "ltc%d, slice %d: %08x",
293 ltc, slice, ltc_intr);
294 nvgpu_writel_check(g, ltc_ltc0_lts0_intr_r() +
295 ltc_stride * ltc + lts_stride * slice,
296 ltc_intr);
297 } 306 }
298 } 307 }
299} 308}
diff --git a/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.h b/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.h
index c1a2bf64..2be3f33e 100644
--- a/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.h
+++ b/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.h
@@ -22,6 +22,7 @@
22 22
23#ifndef LTC_GP10B_H 23#ifndef LTC_GP10B_H
24#define LTC_GP10B_H 24#define LTC_GP10B_H
25struct gk20a;
25struct gpu_ops; 26struct gpu_ops;
26 27
27void gp10b_ltc_isr(struct gk20a *g); 28void gp10b_ltc_isr(struct gk20a *g);
@@ -32,4 +33,6 @@ void gp10b_ltc_init_fs_state(struct gk20a *g);
32int gp10b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, 33int gp10b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
33 u32 min, u32 max); 34 u32 min, u32 max);
34void gp10b_ltc_set_enabled(struct gk20a *g, bool enabled); 35void gp10b_ltc_set_enabled(struct gk20a *g, bool enabled);
36void gp10b_ltc_lts_isr(struct gk20a *g,
37 unsigned int ltc, unsigned int slice);
35#endif 38#endif
diff --git a/drivers/gpu/nvgpu/common/ltc/ltc_gv11b.c b/drivers/gpu/nvgpu/common/ltc/ltc_gv11b.c
index c5bf40c1..69c1ce02 100644
--- a/drivers/gpu/nvgpu/common/ltc/ltc_gv11b.c
+++ b/drivers/gpu/nvgpu/common/ltc/ltc_gv11b.c
@@ -106,16 +106,108 @@ void gv11b_ltc_intr_en_illegal_compstat(struct gk20a *g, bool enable)
106 gk20a_writel(g, ltc_ltcs_ltss_intr_r(), val); 106 gk20a_writel(g, ltc_ltcs_ltss_intr_r(), val);
107} 107}
108 108
109 109void gv11b_ltc_lts_isr(struct gk20a *g,
110void gv11b_ltc_isr(struct gk20a *g) 110 unsigned int ltc, unsigned int slice)
111{ 111{
112 u32 mc_intr, ltc_intr3; 112 u32 offset;
113 unsigned int ltc, slice; 113 u32 ltc_intr3;
114 u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
115 u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
116 u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt; 114 u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt;
117 u32 corrected_delta, uncorrected_delta; 115 u32 corrected_delta, uncorrected_delta;
118 u32 corrected_overflow, uncorrected_overflow; 116 u32 corrected_overflow, uncorrected_overflow;
117 u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
118 u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
119
120 offset = ltc_stride * ltc + lts_stride * slice;
121 ltc_intr3 = gk20a_readl(g, ltc_ltc0_lts0_intr3_r() +
122 offset);
123
124 /* Detect and handle ECC PARITY errors */
125 if (ltc_intr3 &
126 (ltc_ltcs_ltss_intr3_ecc_uncorrected_m() |
127 ltc_ltcs_ltss_intr3_ecc_corrected_m())) {
128
129 ecc_status = gk20a_readl(g,
130 ltc_ltc0_lts0_l2_cache_ecc_status_r() +
131 offset);
132 ecc_addr = gk20a_readl(g,
133 ltc_ltc0_lts0_l2_cache_ecc_address_r() +
134 offset);
135 corrected_cnt = gk20a_readl(g,
136 ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_r() + offset);
137 uncorrected_cnt = gk20a_readl(g,
138 ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_r() + offset);
139
140 corrected_delta =
141 ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_total_v(corrected_cnt);
142 uncorrected_delta =
143 ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_v(uncorrected_cnt);
144 corrected_overflow = ecc_status &
145 ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_total_counter_overflow_m();
146
147 uncorrected_overflow = ecc_status &
148 ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_total_counter_overflow_m();
149
150 /* clear the interrupt */
151 if ((corrected_delta > 0U) || corrected_overflow) {
152 nvgpu_writel_check(g,
153 ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_r() + offset, 0);
154 }
155 if ((uncorrected_delta > 0U) || uncorrected_overflow) {
156 nvgpu_writel_check(g,
157 ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_r() + offset, 0);
158 }
159
160 nvgpu_writel_check(g,
161 ltc_ltc0_lts0_l2_cache_ecc_status_r() + offset,
162 ltc_ltc0_lts0_l2_cache_ecc_status_reset_task_f());
163
164 /* update counters per slice */
165 if (corrected_overflow) {
166 corrected_delta += (0x1U << ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_total_s());
167 }
168 if (uncorrected_overflow) {
169 uncorrected_delta += (0x1U << ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_s());
170 }
171
172 g->ecc.ltc.ecc_sec_count[ltc][slice].counter += corrected_delta;
173 g->ecc.ltc.ecc_ded_count[ltc][slice].counter += uncorrected_delta;
174 nvgpu_log(g, gpu_dbg_intr,
175 "ltc:%d lts: %d cache ecc interrupt intr: 0x%x", ltc, slice, ltc_intr3);
176
177 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_rstg_m()) {
178 nvgpu_log(g, gpu_dbg_intr, "rstg ecc error corrected");
179 }
180 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_rstg_m()) {
181 nvgpu_log(g, gpu_dbg_intr, "rstg ecc error uncorrected");
182 }
183 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_tstg_m()) {
184 nvgpu_log(g, gpu_dbg_intr, "tstg ecc error corrected");
185 }
186 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_tstg_m()) {
187 nvgpu_log(g, gpu_dbg_intr, "tstg ecc error uncorrected");
188 }
189 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_dstg_m()) {
190 nvgpu_log(g, gpu_dbg_intr, "dstg ecc error corrected");
191 }
192 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_dstg_m()) {
193 nvgpu_log(g, gpu_dbg_intr, "dstg ecc error uncorrected");
194 }
195
196 if (corrected_overflow || uncorrected_overflow) {
197 nvgpu_info(g, "ecc counter overflow!");
198 }
199
200 nvgpu_log(g, gpu_dbg_intr,
201 "ecc error address: 0x%x", ecc_addr);
202 }
203
204 gp10b_ltc_lts_isr(g, ltc, slice);
205}
206
207void gv11b_ltc_isr(struct gk20a *g)
208{
209 u32 mc_intr;
210 unsigned int ltc, slice;
119 211
120 mc_intr = gk20a_readl(g, mc_intr_ltc_r()); 212 mc_intr = gk20a_readl(g, mc_intr_ltc_r());
121 for (ltc = 0; ltc < g->ltc_count; ltc++) { 213 for (ltc = 0; ltc < g->ltc_count; ltc++) {
@@ -124,96 +216,7 @@ void gv11b_ltc_isr(struct gk20a *g)
124 } 216 }
125 217
126 for (slice = 0; slice < g->gr.slices_per_ltc; slice++) { 218 for (slice = 0; slice < g->gr.slices_per_ltc; slice++) {
127 u32 offset = ltc_stride * ltc + lts_stride * slice; 219 gv11b_ltc_lts_isr(g, ltc, slice);
128 ltc_intr3 = gk20a_readl(g, ltc_ltc0_lts0_intr3_r() +
129 offset);
130
131 /* Detect and handle ECC PARITY errors */
132
133 if (ltc_intr3 &
134 (ltc_ltcs_ltss_intr3_ecc_uncorrected_m() |
135 ltc_ltcs_ltss_intr3_ecc_corrected_m())) {
136
137 ecc_status = gk20a_readl(g,
138 ltc_ltc0_lts0_l2_cache_ecc_status_r() +
139 offset);
140 ecc_addr = gk20a_readl(g,
141 ltc_ltc0_lts0_l2_cache_ecc_address_r() +
142 offset);
143 corrected_cnt = gk20a_readl(g,
144 ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_r() + offset);
145 uncorrected_cnt = gk20a_readl(g,
146 ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_r() + offset);
147
148 corrected_delta =
149 ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_total_v(corrected_cnt);
150 uncorrected_delta =
151 ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_v(uncorrected_cnt);
152 corrected_overflow = ecc_status &
153 ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_total_counter_overflow_m();
154
155 uncorrected_overflow = ecc_status &
156 ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_total_counter_overflow_m();
157
158 /* clear the interrupt */
159 if ((corrected_delta > 0U) || corrected_overflow) {
160 nvgpu_writel_check(g,
161 ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_r() + offset, 0);
162 }
163 if ((uncorrected_delta > 0U) || uncorrected_overflow) {
164 nvgpu_writel_check(g,
165 ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_r() + offset, 0);
166 }
167
168 nvgpu_writel_check(g,
169 ltc_ltc0_lts0_l2_cache_ecc_status_r() + offset,
170 ltc_ltc0_lts0_l2_cache_ecc_status_reset_task_f());
171
172 /* update counters per slice */
173 if (corrected_overflow) {
174 corrected_delta += (0x1U << ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_total_s());
175 }
176 if (uncorrected_overflow) {
177 uncorrected_delta += (0x1U << ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_s());
178 }
179
180 g->ecc.ltc.ecc_sec_count[ltc][slice].counter += corrected_delta;
181 g->ecc.ltc.ecc_ded_count[ltc][slice].counter += uncorrected_delta;
182 nvgpu_log(g, gpu_dbg_intr,
183 "ltc:%d lts: %d cache ecc interrupt intr: 0x%x", ltc, slice, ltc_intr3);
184
185 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_rstg_m()) {
186 nvgpu_log(g, gpu_dbg_intr, "rstg ecc error corrected");
187 }
188 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_rstg_m()) {
189 nvgpu_log(g, gpu_dbg_intr, "rstg ecc error uncorrected");
190 }
191 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_tstg_m()) {
192 nvgpu_log(g, gpu_dbg_intr, "tstg ecc error corrected");
193 }
194 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_tstg_m()) {
195 nvgpu_log(g, gpu_dbg_intr, "tstg ecc error uncorrected");
196 }
197 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_dstg_m()) {
198 nvgpu_log(g, gpu_dbg_intr, "dstg ecc error corrected");
199 }
200 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_dstg_m()) {
201 nvgpu_log(g, gpu_dbg_intr, "dstg ecc error uncorrected");
202 }
203
204 if (corrected_overflow || uncorrected_overflow) {
205 nvgpu_info(g, "ecc counter overflow!");
206 }
207
208 nvgpu_log(g, gpu_dbg_intr,
209 "ecc error address: 0x%x", ecc_addr);
210
211 }
212
213 } 220 }
214
215 } 221 }
216
217 /* fallback to other interrupts */
218 gp10b_ltc_isr(g);
219} 222}
diff --git a/drivers/gpu/nvgpu/common/ltc/ltc_gv11b.h b/drivers/gpu/nvgpu/common/ltc/ltc_gv11b.h
index 9d33b9fb..bad68661 100644
--- a/drivers/gpu/nvgpu/common/ltc/ltc_gv11b.h
+++ b/drivers/gpu/nvgpu/common/ltc/ltc_gv11b.h
@@ -30,5 +30,7 @@ void gv11b_ltc_set_zbc_stencil_entry(struct gk20a *g,
30void gv11b_ltc_init_fs_state(struct gk20a *g); 30void gv11b_ltc_init_fs_state(struct gk20a *g);
31void gv11b_ltc_intr_en_illegal_compstat(struct gk20a *g, bool enable); 31void gv11b_ltc_intr_en_illegal_compstat(struct gk20a *g, bool enable);
32void gv11b_ltc_isr(struct gk20a *g); 32void gv11b_ltc_isr(struct gk20a *g);
33void gv11b_ltc_lts_isr(struct gk20a *g,
34 unsigned int ltc, unsigned int slice);
33 35
34#endif 36#endif