summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/ltc/ltc_gv11b.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/common/ltc/ltc_gv11b.c')
-rw-r--r--drivers/gpu/nvgpu/common/ltc/ltc_gv11b.c195
1 files changed, 99 insertions, 96 deletions
diff --git a/drivers/gpu/nvgpu/common/ltc/ltc_gv11b.c b/drivers/gpu/nvgpu/common/ltc/ltc_gv11b.c
index c5bf40c1..69c1ce02 100644
--- a/drivers/gpu/nvgpu/common/ltc/ltc_gv11b.c
+++ b/drivers/gpu/nvgpu/common/ltc/ltc_gv11b.c
@@ -106,16 +106,108 @@ void gv11b_ltc_intr_en_illegal_compstat(struct gk20a *g, bool enable)
106 gk20a_writel(g, ltc_ltcs_ltss_intr_r(), val); 106 gk20a_writel(g, ltc_ltcs_ltss_intr_r(), val);
107} 107}
108 108
109 109void gv11b_ltc_lts_isr(struct gk20a *g,
110void gv11b_ltc_isr(struct gk20a *g) 110 unsigned int ltc, unsigned int slice)
111{ 111{
112 u32 mc_intr, ltc_intr3; 112 u32 offset;
113 unsigned int ltc, slice; 113 u32 ltc_intr3;
114 u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
115 u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
116 u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt; 114 u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt;
117 u32 corrected_delta, uncorrected_delta; 115 u32 corrected_delta, uncorrected_delta;
118 u32 corrected_overflow, uncorrected_overflow; 116 u32 corrected_overflow, uncorrected_overflow;
117 u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
118 u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
119
120 offset = ltc_stride * ltc + lts_stride * slice;
121 ltc_intr3 = gk20a_readl(g, ltc_ltc0_lts0_intr3_r() +
122 offset);
123
124 /* Detect and handle ECC PARITY errors */
125 if (ltc_intr3 &
126 (ltc_ltcs_ltss_intr3_ecc_uncorrected_m() |
127 ltc_ltcs_ltss_intr3_ecc_corrected_m())) {
128
129 ecc_status = gk20a_readl(g,
130 ltc_ltc0_lts0_l2_cache_ecc_status_r() +
131 offset);
132 ecc_addr = gk20a_readl(g,
133 ltc_ltc0_lts0_l2_cache_ecc_address_r() +
134 offset);
135 corrected_cnt = gk20a_readl(g,
136 ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_r() + offset);
137 uncorrected_cnt = gk20a_readl(g,
138 ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_r() + offset);
139
140 corrected_delta =
141 ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_total_v(corrected_cnt);
142 uncorrected_delta =
143 ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_v(uncorrected_cnt);
144 corrected_overflow = ecc_status &
145 ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_total_counter_overflow_m();
146
147 uncorrected_overflow = ecc_status &
148 ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_total_counter_overflow_m();
149
150 /* clear the interrupt */
151 if ((corrected_delta > 0U) || corrected_overflow) {
152 nvgpu_writel_check(g,
153 ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_r() + offset, 0);
154 }
155 if ((uncorrected_delta > 0U) || uncorrected_overflow) {
156 nvgpu_writel_check(g,
157 ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_r() + offset, 0);
158 }
159
160 nvgpu_writel_check(g,
161 ltc_ltc0_lts0_l2_cache_ecc_status_r() + offset,
162 ltc_ltc0_lts0_l2_cache_ecc_status_reset_task_f());
163
164 /* update counters per slice */
165 if (corrected_overflow) {
166 corrected_delta += (0x1U << ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_total_s());
167 }
168 if (uncorrected_overflow) {
169 uncorrected_delta += (0x1U << ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_s());
170 }
171
172 g->ecc.ltc.ecc_sec_count[ltc][slice].counter += corrected_delta;
173 g->ecc.ltc.ecc_ded_count[ltc][slice].counter += uncorrected_delta;
174 nvgpu_log(g, gpu_dbg_intr,
175 "ltc:%d lts: %d cache ecc interrupt intr: 0x%x", ltc, slice, ltc_intr3);
176
177 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_rstg_m()) {
178 nvgpu_log(g, gpu_dbg_intr, "rstg ecc error corrected");
179 }
180 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_rstg_m()) {
181 nvgpu_log(g, gpu_dbg_intr, "rstg ecc error uncorrected");
182 }
183 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_tstg_m()) {
184 nvgpu_log(g, gpu_dbg_intr, "tstg ecc error corrected");
185 }
186 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_tstg_m()) {
187 nvgpu_log(g, gpu_dbg_intr, "tstg ecc error uncorrected");
188 }
189 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_dstg_m()) {
190 nvgpu_log(g, gpu_dbg_intr, "dstg ecc error corrected");
191 }
192 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_dstg_m()) {
193 nvgpu_log(g, gpu_dbg_intr, "dstg ecc error uncorrected");
194 }
195
196 if (corrected_overflow || uncorrected_overflow) {
197 nvgpu_info(g, "ecc counter overflow!");
198 }
199
200 nvgpu_log(g, gpu_dbg_intr,
201 "ecc error address: 0x%x", ecc_addr);
202 }
203
204 gp10b_ltc_lts_isr(g, ltc, slice);
205}
206
207void gv11b_ltc_isr(struct gk20a *g)
208{
209 u32 mc_intr;
210 unsigned int ltc, slice;
119 211
120 mc_intr = gk20a_readl(g, mc_intr_ltc_r()); 212 mc_intr = gk20a_readl(g, mc_intr_ltc_r());
121 for (ltc = 0; ltc < g->ltc_count; ltc++) { 213 for (ltc = 0; ltc < g->ltc_count; ltc++) {
@@ -124,96 +216,7 @@ void gv11b_ltc_isr(struct gk20a *g)
124 } 216 }
125 217
126 for (slice = 0; slice < g->gr.slices_per_ltc; slice++) { 218 for (slice = 0; slice < g->gr.slices_per_ltc; slice++) {
127 u32 offset = ltc_stride * ltc + lts_stride * slice; 219 gv11b_ltc_lts_isr(g, ltc, slice);
128 ltc_intr3 = gk20a_readl(g, ltc_ltc0_lts0_intr3_r() +
129 offset);
130
131 /* Detect and handle ECC PARITY errors */
132
133 if (ltc_intr3 &
134 (ltc_ltcs_ltss_intr3_ecc_uncorrected_m() |
135 ltc_ltcs_ltss_intr3_ecc_corrected_m())) {
136
137 ecc_status = gk20a_readl(g,
138 ltc_ltc0_lts0_l2_cache_ecc_status_r() +
139 offset);
140 ecc_addr = gk20a_readl(g,
141 ltc_ltc0_lts0_l2_cache_ecc_address_r() +
142 offset);
143 corrected_cnt = gk20a_readl(g,
144 ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_r() + offset);
145 uncorrected_cnt = gk20a_readl(g,
146 ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_r() + offset);
147
148 corrected_delta =
149 ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_total_v(corrected_cnt);
150 uncorrected_delta =
151 ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_v(uncorrected_cnt);
152 corrected_overflow = ecc_status &
153 ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_total_counter_overflow_m();
154
155 uncorrected_overflow = ecc_status &
156 ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_total_counter_overflow_m();
157
158 /* clear the interrupt */
159 if ((corrected_delta > 0U) || corrected_overflow) {
160 nvgpu_writel_check(g,
161 ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_r() + offset, 0);
162 }
163 if ((uncorrected_delta > 0U) || uncorrected_overflow) {
164 nvgpu_writel_check(g,
165 ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_r() + offset, 0);
166 }
167
168 nvgpu_writel_check(g,
169 ltc_ltc0_lts0_l2_cache_ecc_status_r() + offset,
170 ltc_ltc0_lts0_l2_cache_ecc_status_reset_task_f());
171
172 /* update counters per slice */
173 if (corrected_overflow) {
174 corrected_delta += (0x1U << ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_total_s());
175 }
176 if (uncorrected_overflow) {
177 uncorrected_delta += (0x1U << ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_s());
178 }
179
180 g->ecc.ltc.ecc_sec_count[ltc][slice].counter += corrected_delta;
181 g->ecc.ltc.ecc_ded_count[ltc][slice].counter += uncorrected_delta;
182 nvgpu_log(g, gpu_dbg_intr,
183 "ltc:%d lts: %d cache ecc interrupt intr: 0x%x", ltc, slice, ltc_intr3);
184
185 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_rstg_m()) {
186 nvgpu_log(g, gpu_dbg_intr, "rstg ecc error corrected");
187 }
188 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_rstg_m()) {
189 nvgpu_log(g, gpu_dbg_intr, "rstg ecc error uncorrected");
190 }
191 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_tstg_m()) {
192 nvgpu_log(g, gpu_dbg_intr, "tstg ecc error corrected");
193 }
194 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_tstg_m()) {
195 nvgpu_log(g, gpu_dbg_intr, "tstg ecc error uncorrected");
196 }
197 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_dstg_m()) {
198 nvgpu_log(g, gpu_dbg_intr, "dstg ecc error corrected");
199 }
200 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_dstg_m()) {
201 nvgpu_log(g, gpu_dbg_intr, "dstg ecc error uncorrected");
202 }
203
204 if (corrected_overflow || uncorrected_overflow) {
205 nvgpu_info(g, "ecc counter overflow!");
206 }
207
208 nvgpu_log(g, gpu_dbg_intr,
209 "ecc error address: 0x%x", ecc_addr);
210
211 }
212
213 } 220 }
214
215 } 221 }
216
217 /* fallback to other interrupts */
218 gp10b_ltc_isr(g);
219} 222}