summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/ltc
diff options
context:
space:
mode:
authorDeepak Nibade <dnibade@nvidia.com>2018-09-12 08:29:52 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-09-20 13:52:22 -0400
commitd29300017aa1cd8ef1d2daef4383224bf00aff37 (patch)
tree92d4e912d71e908cae8719a5784676ae069b2982 /drivers/gpu/nvgpu/common/ltc
parent46477494b2f5d566a0c133746af00a3da4ee6b90 (diff)
gpu: nvgpu: move lts intr servicing to separate functions
We right now service LTS interrupts as part of LTC interrupt service routine Separate out the LTS interrupt handling in separate functions e.g. gp10b_ltc_lts_isr() for gp10b and gv11b_ltc_lts_isr() for gv11b gv11b_ltc_lts_isr() now calls gp10b_ltc_lts_isr() to service legacy LTS interrupts instead of calling gp10b_ltc_isr() directly Bug 2216662 Jira NVGPU-767 Change-Id: Ia8499feca83f67ac455cee311edf32390acb83b8 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1821430 Reviewed-by: svc-misra-checker <svc-misra-checker@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common/ltc')
-rw-r--r--drivers/gpu/nvgpu/common/ltc/ltc_gp10b.c109
-rw-r--r--drivers/gpu/nvgpu/common/ltc/ltc_gp10b.h3
-rw-r--r--drivers/gpu/nvgpu/common/ltc/ltc_gv11b.c195
-rw-r--r--drivers/gpu/nvgpu/common/ltc/ltc_gv11b.h2
4 files changed, 163 insertions, 146 deletions
diff --git a/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.c b/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.c
index 9aabf543..4d11b44b 100644
--- a/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.c
+++ b/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.c
@@ -233,13 +233,68 @@ out:
233 return err; 233 return err;
234} 234}
235 235
236void gp10b_ltc_isr(struct gk20a *g) 236void gp10b_ltc_lts_isr(struct gk20a *g,
237 unsigned int ltc, unsigned int slice)
237{ 238{
238 u32 mc_intr, ltc_intr; 239 u32 offset;
239 unsigned int ltc, slice; 240 u32 ltc_intr;
240 u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); 241 u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
241 u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); 242 u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
242 243
244 offset = ltc_stride * ltc + lts_stride * slice;
245 ltc_intr = gk20a_readl(g, ltc_ltc0_lts0_intr_r() + offset);
246
247 /* Detect and handle ECC errors */
248 if (ltc_intr &
249 ltc_ltcs_ltss_intr_ecc_sec_error_pending_f()) {
250 u32 ecc_stats_reg_val;
251
252 nvgpu_err(g,
253 "Single bit error detected in GPU L2!");
254
255 ecc_stats_reg_val =
256 gk20a_readl(g,
257 ltc_ltc0_lts0_dstg_ecc_report_r() + offset);
258 g->ecc.ltc.ecc_sec_count[ltc][slice].counter +=
259 ltc_ltc0_lts0_dstg_ecc_report_sec_count_v(ecc_stats_reg_val);
260 ecc_stats_reg_val &=
261 ~(ltc_ltc0_lts0_dstg_ecc_report_sec_count_m());
262 nvgpu_writel_check(g,
263 ltc_ltc0_lts0_dstg_ecc_report_r() + offset,
264 ecc_stats_reg_val);
265 g->ops.mm.l2_flush(g, true);
266 }
267 if (ltc_intr &
268 ltc_ltcs_ltss_intr_ecc_ded_error_pending_f()) {
269 u32 ecc_stats_reg_val;
270
271 nvgpu_err(g,
272 "Double bit error detected in GPU L2!");
273
274 ecc_stats_reg_val =
275 gk20a_readl(g,
276 ltc_ltc0_lts0_dstg_ecc_report_r() + offset);
277 g->ecc.ltc.ecc_ded_count[ltc][slice].counter +=
278 ltc_ltc0_lts0_dstg_ecc_report_ded_count_v(ecc_stats_reg_val);
279 ecc_stats_reg_val &=
280 ~(ltc_ltc0_lts0_dstg_ecc_report_ded_count_m());
281 nvgpu_writel_check(g,
282 ltc_ltc0_lts0_dstg_ecc_report_r() + offset,
283 ecc_stats_reg_val);
284 }
285
286 nvgpu_err(g, "ltc%d, slice %d: %08x",
287 ltc, slice, ltc_intr);
288 nvgpu_writel_check(g, ltc_ltc0_lts0_intr_r() +
289 ltc_stride * ltc + lts_stride * slice,
290 ltc_intr);
291}
292
293void gp10b_ltc_isr(struct gk20a *g)
294{
295 u32 mc_intr;
296 unsigned int ltc, slice;
297
243 mc_intr = gk20a_readl(g, mc_intr_ltc_r()); 298 mc_intr = gk20a_readl(g, mc_intr_ltc_r());
244 nvgpu_err(g, "mc_ltc_intr: %08x", mc_intr); 299 nvgpu_err(g, "mc_ltc_intr: %08x", mc_intr);
245 for (ltc = 0; ltc < g->ltc_count; ltc++) { 300 for (ltc = 0; ltc < g->ltc_count; ltc++) {
@@ -247,53 +302,7 @@ void gp10b_ltc_isr(struct gk20a *g)
247 continue; 302 continue;
248 } 303 }
249 for (slice = 0; slice < g->gr.slices_per_ltc; slice++) { 304 for (slice = 0; slice < g->gr.slices_per_ltc; slice++) {
250 u32 offset = ltc_stride * ltc + lts_stride * slice; 305 gp10b_ltc_lts_isr(g, ltc, slice);
251 ltc_intr = gk20a_readl(g, ltc_ltc0_lts0_intr_r() + offset);
252
253 /* Detect and handle ECC errors */
254 if (ltc_intr &
255 ltc_ltcs_ltss_intr_ecc_sec_error_pending_f()) {
256 u32 ecc_stats_reg_val;
257
258 nvgpu_err(g,
259 "Single bit error detected in GPU L2!");
260
261 ecc_stats_reg_val =
262 gk20a_readl(g,
263 ltc_ltc0_lts0_dstg_ecc_report_r() + offset);
264 g->ecc.ltc.ecc_sec_count[ltc][slice].counter +=
265 ltc_ltc0_lts0_dstg_ecc_report_sec_count_v(ecc_stats_reg_val);
266 ecc_stats_reg_val &=
267 ~(ltc_ltc0_lts0_dstg_ecc_report_sec_count_m());
268 nvgpu_writel_check(g,
269 ltc_ltc0_lts0_dstg_ecc_report_r() + offset,
270 ecc_stats_reg_val);
271 g->ops.mm.l2_flush(g, true);
272 }
273 if (ltc_intr &
274 ltc_ltcs_ltss_intr_ecc_ded_error_pending_f()) {
275 u32 ecc_stats_reg_val;
276
277 nvgpu_err(g,
278 "Double bit error detected in GPU L2!");
279
280 ecc_stats_reg_val =
281 gk20a_readl(g,
282 ltc_ltc0_lts0_dstg_ecc_report_r() + offset);
283 g->ecc.ltc.ecc_ded_count[ltc][slice].counter +=
284 ltc_ltc0_lts0_dstg_ecc_report_ded_count_v(ecc_stats_reg_val);
285 ecc_stats_reg_val &=
286 ~(ltc_ltc0_lts0_dstg_ecc_report_ded_count_m());
287 nvgpu_writel_check(g,
288 ltc_ltc0_lts0_dstg_ecc_report_r() + offset,
289 ecc_stats_reg_val);
290 }
291
292 nvgpu_err(g, "ltc%d, slice %d: %08x",
293 ltc, slice, ltc_intr);
294 nvgpu_writel_check(g, ltc_ltc0_lts0_intr_r() +
295 ltc_stride * ltc + lts_stride * slice,
296 ltc_intr);
297 } 306 }
298 } 307 }
299} 308}
diff --git a/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.h b/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.h
index c1a2bf64..2be3f33e 100644
--- a/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.h
+++ b/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.h
@@ -22,6 +22,7 @@
22 22
23#ifndef LTC_GP10B_H 23#ifndef LTC_GP10B_H
24#define LTC_GP10B_H 24#define LTC_GP10B_H
25struct gk20a;
25struct gpu_ops; 26struct gpu_ops;
26 27
27void gp10b_ltc_isr(struct gk20a *g); 28void gp10b_ltc_isr(struct gk20a *g);
@@ -32,4 +33,6 @@ void gp10b_ltc_init_fs_state(struct gk20a *g);
32int gp10b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, 33int gp10b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
33 u32 min, u32 max); 34 u32 min, u32 max);
34void gp10b_ltc_set_enabled(struct gk20a *g, bool enabled); 35void gp10b_ltc_set_enabled(struct gk20a *g, bool enabled);
36void gp10b_ltc_lts_isr(struct gk20a *g,
37 unsigned int ltc, unsigned int slice);
35#endif 38#endif
diff --git a/drivers/gpu/nvgpu/common/ltc/ltc_gv11b.c b/drivers/gpu/nvgpu/common/ltc/ltc_gv11b.c
index c5bf40c1..69c1ce02 100644
--- a/drivers/gpu/nvgpu/common/ltc/ltc_gv11b.c
+++ b/drivers/gpu/nvgpu/common/ltc/ltc_gv11b.c
@@ -106,16 +106,108 @@ void gv11b_ltc_intr_en_illegal_compstat(struct gk20a *g, bool enable)
106 gk20a_writel(g, ltc_ltcs_ltss_intr_r(), val); 106 gk20a_writel(g, ltc_ltcs_ltss_intr_r(), val);
107} 107}
108 108
109 109void gv11b_ltc_lts_isr(struct gk20a *g,
110void gv11b_ltc_isr(struct gk20a *g) 110 unsigned int ltc, unsigned int slice)
111{ 111{
112 u32 mc_intr, ltc_intr3; 112 u32 offset;
113 unsigned int ltc, slice; 113 u32 ltc_intr3;
114 u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
115 u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
116 u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt; 114 u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt;
117 u32 corrected_delta, uncorrected_delta; 115 u32 corrected_delta, uncorrected_delta;
118 u32 corrected_overflow, uncorrected_overflow; 116 u32 corrected_overflow, uncorrected_overflow;
117 u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
118 u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
119
120 offset = ltc_stride * ltc + lts_stride * slice;
121 ltc_intr3 = gk20a_readl(g, ltc_ltc0_lts0_intr3_r() +
122 offset);
123
124 /* Detect and handle ECC PARITY errors */
125 if (ltc_intr3 &
126 (ltc_ltcs_ltss_intr3_ecc_uncorrected_m() |
127 ltc_ltcs_ltss_intr3_ecc_corrected_m())) {
128
129 ecc_status = gk20a_readl(g,
130 ltc_ltc0_lts0_l2_cache_ecc_status_r() +
131 offset);
132 ecc_addr = gk20a_readl(g,
133 ltc_ltc0_lts0_l2_cache_ecc_address_r() +
134 offset);
135 corrected_cnt = gk20a_readl(g,
136 ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_r() + offset);
137 uncorrected_cnt = gk20a_readl(g,
138 ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_r() + offset);
139
140 corrected_delta =
141 ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_total_v(corrected_cnt);
142 uncorrected_delta =
143 ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_v(uncorrected_cnt);
144 corrected_overflow = ecc_status &
145 ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_total_counter_overflow_m();
146
147 uncorrected_overflow = ecc_status &
148 ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_total_counter_overflow_m();
149
150 /* clear the interrupt */
151 if ((corrected_delta > 0U) || corrected_overflow) {
152 nvgpu_writel_check(g,
153 ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_r() + offset, 0);
154 }
155 if ((uncorrected_delta > 0U) || uncorrected_overflow) {
156 nvgpu_writel_check(g,
157 ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_r() + offset, 0);
158 }
159
160 nvgpu_writel_check(g,
161 ltc_ltc0_lts0_l2_cache_ecc_status_r() + offset,
162 ltc_ltc0_lts0_l2_cache_ecc_status_reset_task_f());
163
164 /* update counters per slice */
165 if (corrected_overflow) {
166 corrected_delta += (0x1U << ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_total_s());
167 }
168 if (uncorrected_overflow) {
169 uncorrected_delta += (0x1U << ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_s());
170 }
171
172 g->ecc.ltc.ecc_sec_count[ltc][slice].counter += corrected_delta;
173 g->ecc.ltc.ecc_ded_count[ltc][slice].counter += uncorrected_delta;
174 nvgpu_log(g, gpu_dbg_intr,
175 "ltc:%d lts: %d cache ecc interrupt intr: 0x%x", ltc, slice, ltc_intr3);
176
177 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_rstg_m()) {
178 nvgpu_log(g, gpu_dbg_intr, "rstg ecc error corrected");
179 }
180 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_rstg_m()) {
181 nvgpu_log(g, gpu_dbg_intr, "rstg ecc error uncorrected");
182 }
183 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_tstg_m()) {
184 nvgpu_log(g, gpu_dbg_intr, "tstg ecc error corrected");
185 }
186 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_tstg_m()) {
187 nvgpu_log(g, gpu_dbg_intr, "tstg ecc error uncorrected");
188 }
189 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_dstg_m()) {
190 nvgpu_log(g, gpu_dbg_intr, "dstg ecc error corrected");
191 }
192 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_dstg_m()) {
193 nvgpu_log(g, gpu_dbg_intr, "dstg ecc error uncorrected");
194 }
195
196 if (corrected_overflow || uncorrected_overflow) {
197 nvgpu_info(g, "ecc counter overflow!");
198 }
199
200 nvgpu_log(g, gpu_dbg_intr,
201 "ecc error address: 0x%x", ecc_addr);
202 }
203
204 gp10b_ltc_lts_isr(g, ltc, slice);
205}
206
207void gv11b_ltc_isr(struct gk20a *g)
208{
209 u32 mc_intr;
210 unsigned int ltc, slice;
119 211
120 mc_intr = gk20a_readl(g, mc_intr_ltc_r()); 212 mc_intr = gk20a_readl(g, mc_intr_ltc_r());
121 for (ltc = 0; ltc < g->ltc_count; ltc++) { 213 for (ltc = 0; ltc < g->ltc_count; ltc++) {
@@ -124,96 +216,7 @@ void gv11b_ltc_isr(struct gk20a *g)
124 } 216 }
125 217
126 for (slice = 0; slice < g->gr.slices_per_ltc; slice++) { 218 for (slice = 0; slice < g->gr.slices_per_ltc; slice++) {
127 u32 offset = ltc_stride * ltc + lts_stride * slice; 219 gv11b_ltc_lts_isr(g, ltc, slice);
128 ltc_intr3 = gk20a_readl(g, ltc_ltc0_lts0_intr3_r() +
129 offset);
130
131 /* Detect and handle ECC PARITY errors */
132
133 if (ltc_intr3 &
134 (ltc_ltcs_ltss_intr3_ecc_uncorrected_m() |
135 ltc_ltcs_ltss_intr3_ecc_corrected_m())) {
136
137 ecc_status = gk20a_readl(g,
138 ltc_ltc0_lts0_l2_cache_ecc_status_r() +
139 offset);
140 ecc_addr = gk20a_readl(g,
141 ltc_ltc0_lts0_l2_cache_ecc_address_r() +
142 offset);
143 corrected_cnt = gk20a_readl(g,
144 ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_r() + offset);
145 uncorrected_cnt = gk20a_readl(g,
146 ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_r() + offset);
147
148 corrected_delta =
149 ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_total_v(corrected_cnt);
150 uncorrected_delta =
151 ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_v(uncorrected_cnt);
152 corrected_overflow = ecc_status &
153 ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_total_counter_overflow_m();
154
155 uncorrected_overflow = ecc_status &
156 ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_total_counter_overflow_m();
157
158 /* clear the interrupt */
159 if ((corrected_delta > 0U) || corrected_overflow) {
160 nvgpu_writel_check(g,
161 ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_r() + offset, 0);
162 }
163 if ((uncorrected_delta > 0U) || uncorrected_overflow) {
164 nvgpu_writel_check(g,
165 ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_r() + offset, 0);
166 }
167
168 nvgpu_writel_check(g,
169 ltc_ltc0_lts0_l2_cache_ecc_status_r() + offset,
170 ltc_ltc0_lts0_l2_cache_ecc_status_reset_task_f());
171
172 /* update counters per slice */
173 if (corrected_overflow) {
174 corrected_delta += (0x1U << ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_total_s());
175 }
176 if (uncorrected_overflow) {
177 uncorrected_delta += (0x1U << ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_s());
178 }
179
180 g->ecc.ltc.ecc_sec_count[ltc][slice].counter += corrected_delta;
181 g->ecc.ltc.ecc_ded_count[ltc][slice].counter += uncorrected_delta;
182 nvgpu_log(g, gpu_dbg_intr,
183 "ltc:%d lts: %d cache ecc interrupt intr: 0x%x", ltc, slice, ltc_intr3);
184
185 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_rstg_m()) {
186 nvgpu_log(g, gpu_dbg_intr, "rstg ecc error corrected");
187 }
188 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_rstg_m()) {
189 nvgpu_log(g, gpu_dbg_intr, "rstg ecc error uncorrected");
190 }
191 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_tstg_m()) {
192 nvgpu_log(g, gpu_dbg_intr, "tstg ecc error corrected");
193 }
194 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_tstg_m()) {
195 nvgpu_log(g, gpu_dbg_intr, "tstg ecc error uncorrected");
196 }
197 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_dstg_m()) {
198 nvgpu_log(g, gpu_dbg_intr, "dstg ecc error corrected");
199 }
200 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_dstg_m()) {
201 nvgpu_log(g, gpu_dbg_intr, "dstg ecc error uncorrected");
202 }
203
204 if (corrected_overflow || uncorrected_overflow) {
205 nvgpu_info(g, "ecc counter overflow!");
206 }
207
208 nvgpu_log(g, gpu_dbg_intr,
209 "ecc error address: 0x%x", ecc_addr);
210
211 }
212
213 } 220 }
214
215 } 221 }
216
217 /* fallback to other interrupts */
218 gp10b_ltc_isr(g);
219} 222}
diff --git a/drivers/gpu/nvgpu/common/ltc/ltc_gv11b.h b/drivers/gpu/nvgpu/common/ltc/ltc_gv11b.h
index 9d33b9fb..bad68661 100644
--- a/drivers/gpu/nvgpu/common/ltc/ltc_gv11b.h
+++ b/drivers/gpu/nvgpu/common/ltc/ltc_gv11b.h
@@ -30,5 +30,7 @@ void gv11b_ltc_set_zbc_stencil_entry(struct gk20a *g,
30void gv11b_ltc_init_fs_state(struct gk20a *g); 30void gv11b_ltc_init_fs_state(struct gk20a *g);
31void gv11b_ltc_intr_en_illegal_compstat(struct gk20a *g, bool enable); 31void gv11b_ltc_intr_en_illegal_compstat(struct gk20a *g, bool enable);
32void gv11b_ltc_isr(struct gk20a *g); 32void gv11b_ltc_isr(struct gk20a *g);
33void gv11b_ltc_lts_isr(struct gk20a *g,
34 unsigned int ltc, unsigned int slice);
33 35
34#endif 36#endif