diff options
Diffstat (limited to 'drivers/gpu/nvgpu/common/ltc/ltc_gv11b.c')
-rw-r--r-- | drivers/gpu/nvgpu/common/ltc/ltc_gv11b.c | 195 |
1 files changed, 99 insertions, 96 deletions
diff --git a/drivers/gpu/nvgpu/common/ltc/ltc_gv11b.c b/drivers/gpu/nvgpu/common/ltc/ltc_gv11b.c index c5bf40c1..69c1ce02 100644 --- a/drivers/gpu/nvgpu/common/ltc/ltc_gv11b.c +++ b/drivers/gpu/nvgpu/common/ltc/ltc_gv11b.c | |||
@@ -106,16 +106,108 @@ void gv11b_ltc_intr_en_illegal_compstat(struct gk20a *g, bool enable) | |||
106 | gk20a_writel(g, ltc_ltcs_ltss_intr_r(), val); | 106 | gk20a_writel(g, ltc_ltcs_ltss_intr_r(), val); |
107 | } | 107 | } |
108 | 108 | ||
109 | 109 | void gv11b_ltc_lts_isr(struct gk20a *g, | |
110 | void gv11b_ltc_isr(struct gk20a *g) | 110 | unsigned int ltc, unsigned int slice) |
111 | { | 111 | { |
112 | u32 mc_intr, ltc_intr3; | 112 | u32 offset; |
113 | unsigned int ltc, slice; | 113 | u32 ltc_intr3; |
114 | u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); | ||
115 | u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); | ||
116 | u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt; | 114 | u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt; |
117 | u32 corrected_delta, uncorrected_delta; | 115 | u32 corrected_delta, uncorrected_delta; |
118 | u32 corrected_overflow, uncorrected_overflow; | 116 | u32 corrected_overflow, uncorrected_overflow; |
117 | u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); | ||
118 | u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); | ||
119 | |||
120 | offset = ltc_stride * ltc + lts_stride * slice; | ||
121 | ltc_intr3 = gk20a_readl(g, ltc_ltc0_lts0_intr3_r() + | ||
122 | offset); | ||
123 | |||
124 | /* Detect and handle ECC PARITY errors */ | ||
125 | if (ltc_intr3 & | ||
126 | (ltc_ltcs_ltss_intr3_ecc_uncorrected_m() | | ||
127 | ltc_ltcs_ltss_intr3_ecc_corrected_m())) { | ||
128 | |||
129 | ecc_status = gk20a_readl(g, | ||
130 | ltc_ltc0_lts0_l2_cache_ecc_status_r() + | ||
131 | offset); | ||
132 | ecc_addr = gk20a_readl(g, | ||
133 | ltc_ltc0_lts0_l2_cache_ecc_address_r() + | ||
134 | offset); | ||
135 | corrected_cnt = gk20a_readl(g, | ||
136 | ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_r() + offset); | ||
137 | uncorrected_cnt = gk20a_readl(g, | ||
138 | ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_r() + offset); | ||
139 | |||
140 | corrected_delta = | ||
141 | ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_total_v(corrected_cnt); | ||
142 | uncorrected_delta = | ||
143 | ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_v(uncorrected_cnt); | ||
144 | corrected_overflow = ecc_status & | ||
145 | ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_total_counter_overflow_m(); | ||
146 | |||
147 | uncorrected_overflow = ecc_status & | ||
148 | ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_total_counter_overflow_m(); | ||
149 | |||
150 | /* clear the interrupt */ | ||
151 | if ((corrected_delta > 0U) || corrected_overflow) { | ||
152 | nvgpu_writel_check(g, | ||
153 | ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_r() + offset, 0); | ||
154 | } | ||
155 | if ((uncorrected_delta > 0U) || uncorrected_overflow) { | ||
156 | nvgpu_writel_check(g, | ||
157 | ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_r() + offset, 0); | ||
158 | } | ||
159 | |||
160 | nvgpu_writel_check(g, | ||
161 | ltc_ltc0_lts0_l2_cache_ecc_status_r() + offset, | ||
162 | ltc_ltc0_lts0_l2_cache_ecc_status_reset_task_f()); | ||
163 | |||
164 | /* update counters per slice */ | ||
165 | if (corrected_overflow) { | ||
166 | corrected_delta += (0x1U << ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_total_s()); | ||
167 | } | ||
168 | if (uncorrected_overflow) { | ||
169 | uncorrected_delta += (0x1U << ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_s()); | ||
170 | } | ||
171 | |||
172 | g->ecc.ltc.ecc_sec_count[ltc][slice].counter += corrected_delta; | ||
173 | g->ecc.ltc.ecc_ded_count[ltc][slice].counter += uncorrected_delta; | ||
174 | nvgpu_log(g, gpu_dbg_intr, | ||
175 | "ltc:%d lts: %d cache ecc interrupt intr: 0x%x", ltc, slice, ltc_intr3); | ||
176 | |||
177 | if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_rstg_m()) { | ||
178 | nvgpu_log(g, gpu_dbg_intr, "rstg ecc error corrected"); | ||
179 | } | ||
180 | if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_rstg_m()) { | ||
181 | nvgpu_log(g, gpu_dbg_intr, "rstg ecc error uncorrected"); | ||
182 | } | ||
183 | if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_tstg_m()) { | ||
184 | nvgpu_log(g, gpu_dbg_intr, "tstg ecc error corrected"); | ||
185 | } | ||
186 | if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_tstg_m()) { | ||
187 | nvgpu_log(g, gpu_dbg_intr, "tstg ecc error uncorrected"); | ||
188 | } | ||
189 | if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_dstg_m()) { | ||
190 | nvgpu_log(g, gpu_dbg_intr, "dstg ecc error corrected"); | ||
191 | } | ||
192 | if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_dstg_m()) { | ||
193 | nvgpu_log(g, gpu_dbg_intr, "dstg ecc error uncorrected"); | ||
194 | } | ||
195 | |||
196 | if (corrected_overflow || uncorrected_overflow) { | ||
197 | nvgpu_info(g, "ecc counter overflow!"); | ||
198 | } | ||
199 | |||
200 | nvgpu_log(g, gpu_dbg_intr, | ||
201 | "ecc error address: 0x%x", ecc_addr); | ||
202 | } | ||
203 | |||
204 | gp10b_ltc_lts_isr(g, ltc, slice); | ||
205 | } | ||
206 | |||
207 | void gv11b_ltc_isr(struct gk20a *g) | ||
208 | { | ||
209 | u32 mc_intr; | ||
210 | unsigned int ltc, slice; | ||
119 | 211 | ||
120 | mc_intr = gk20a_readl(g, mc_intr_ltc_r()); | 212 | mc_intr = gk20a_readl(g, mc_intr_ltc_r()); |
121 | for (ltc = 0; ltc < g->ltc_count; ltc++) { | 213 | for (ltc = 0; ltc < g->ltc_count; ltc++) { |
@@ -124,96 +216,7 @@ void gv11b_ltc_isr(struct gk20a *g) | |||
124 | } | 216 | } |
125 | 217 | ||
126 | for (slice = 0; slice < g->gr.slices_per_ltc; slice++) { | 218 | for (slice = 0; slice < g->gr.slices_per_ltc; slice++) { |
127 | u32 offset = ltc_stride * ltc + lts_stride * slice; | 219 | gv11b_ltc_lts_isr(g, ltc, slice); |
128 | ltc_intr3 = gk20a_readl(g, ltc_ltc0_lts0_intr3_r() + | ||
129 | offset); | ||
130 | |||
131 | /* Detect and handle ECC PARITY errors */ | ||
132 | |||
133 | if (ltc_intr3 & | ||
134 | (ltc_ltcs_ltss_intr3_ecc_uncorrected_m() | | ||
135 | ltc_ltcs_ltss_intr3_ecc_corrected_m())) { | ||
136 | |||
137 | ecc_status = gk20a_readl(g, | ||
138 | ltc_ltc0_lts0_l2_cache_ecc_status_r() + | ||
139 | offset); | ||
140 | ecc_addr = gk20a_readl(g, | ||
141 | ltc_ltc0_lts0_l2_cache_ecc_address_r() + | ||
142 | offset); | ||
143 | corrected_cnt = gk20a_readl(g, | ||
144 | ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_r() + offset); | ||
145 | uncorrected_cnt = gk20a_readl(g, | ||
146 | ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_r() + offset); | ||
147 | |||
148 | corrected_delta = | ||
149 | ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_total_v(corrected_cnt); | ||
150 | uncorrected_delta = | ||
151 | ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_v(uncorrected_cnt); | ||
152 | corrected_overflow = ecc_status & | ||
153 | ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_total_counter_overflow_m(); | ||
154 | |||
155 | uncorrected_overflow = ecc_status & | ||
156 | ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_total_counter_overflow_m(); | ||
157 | |||
158 | /* clear the interrupt */ | ||
159 | if ((corrected_delta > 0U) || corrected_overflow) { | ||
160 | nvgpu_writel_check(g, | ||
161 | ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_r() + offset, 0); | ||
162 | } | ||
163 | if ((uncorrected_delta > 0U) || uncorrected_overflow) { | ||
164 | nvgpu_writel_check(g, | ||
165 | ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_r() + offset, 0); | ||
166 | } | ||
167 | |||
168 | nvgpu_writel_check(g, | ||
169 | ltc_ltc0_lts0_l2_cache_ecc_status_r() + offset, | ||
170 | ltc_ltc0_lts0_l2_cache_ecc_status_reset_task_f()); | ||
171 | |||
172 | /* update counters per slice */ | ||
173 | if (corrected_overflow) { | ||
174 | corrected_delta += (0x1U << ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_total_s()); | ||
175 | } | ||
176 | if (uncorrected_overflow) { | ||
177 | uncorrected_delta += (0x1U << ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_s()); | ||
178 | } | ||
179 | |||
180 | g->ecc.ltc.ecc_sec_count[ltc][slice].counter += corrected_delta; | ||
181 | g->ecc.ltc.ecc_ded_count[ltc][slice].counter += uncorrected_delta; | ||
182 | nvgpu_log(g, gpu_dbg_intr, | ||
183 | "ltc:%d lts: %d cache ecc interrupt intr: 0x%x", ltc, slice, ltc_intr3); | ||
184 | |||
185 | if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_rstg_m()) { | ||
186 | nvgpu_log(g, gpu_dbg_intr, "rstg ecc error corrected"); | ||
187 | } | ||
188 | if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_rstg_m()) { | ||
189 | nvgpu_log(g, gpu_dbg_intr, "rstg ecc error uncorrected"); | ||
190 | } | ||
191 | if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_tstg_m()) { | ||
192 | nvgpu_log(g, gpu_dbg_intr, "tstg ecc error corrected"); | ||
193 | } | ||
194 | if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_tstg_m()) { | ||
195 | nvgpu_log(g, gpu_dbg_intr, "tstg ecc error uncorrected"); | ||
196 | } | ||
197 | if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_dstg_m()) { | ||
198 | nvgpu_log(g, gpu_dbg_intr, "dstg ecc error corrected"); | ||
199 | } | ||
200 | if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_dstg_m()) { | ||
201 | nvgpu_log(g, gpu_dbg_intr, "dstg ecc error uncorrected"); | ||
202 | } | ||
203 | |||
204 | if (corrected_overflow || uncorrected_overflow) { | ||
205 | nvgpu_info(g, "ecc counter overflow!"); | ||
206 | } | ||
207 | |||
208 | nvgpu_log(g, gpu_dbg_intr, | ||
209 | "ecc error address: 0x%x", ecc_addr); | ||
210 | |||
211 | } | ||
212 | |||
213 | } | 220 | } |
214 | |||
215 | } | 221 | } |
216 | |||
217 | /* fallback to other interrupts */ | ||
218 | gp10b_ltc_isr(g); | ||
219 | } | 222 | } |