diff options
author | David Nieto <dmartineznie@nvidia.com> | 2017-05-26 17:36:26 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-06-04 23:34:58 -0400 |
commit | 3dc28cb1ab934ebcda33933086d7d0ffc8d1f907 (patch) | |
tree | 4a27e428220d8083992c07fb905b430f91162cf8 /drivers/gpu/nvgpu/gv11b/fb_gv11b.c | |
parent | 345eaef6a76771da9c3e8a5e375fc9d659fb1b2b (diff) |
gpu: nvgpu: add chip specific ECC counters
Add support for ECC counters for HUB MMU
JIRA: GPUT19X-82
Change-Id: I691d5898d4db9fe2cd68f217baa646479ab5cb00
Signed-off-by: David Nieto <dmartineznie@nvidia.com>
Reviewed-on: http://git-master/r/1490825
Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b/fb_gv11b.c')
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/fb_gv11b.c | 203 |
1 files changed, 185 insertions, 18 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/fb_gv11b.c b/drivers/gpu/nvgpu/gv11b/fb_gv11b.c index 975692a6..9a4ea36b 100644 --- a/drivers/gpu/nvgpu/gv11b/fb_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/fb_gv11b.c | |||
@@ -224,6 +224,185 @@ void gv11b_fb_disable_hub_intr(struct gk20a *g, | |||
224 | gv11b_fb_intr_en_clr(g, index, mask); | 224 | gv11b_fb_intr_en_clr(g, index, mask); |
225 | } | 225 | } |
226 | 226 | ||
227 | static void gv11b_handle_l2tlb_ecc_isr(struct gk20a *g, u32 ecc_status) | ||
228 | { | ||
229 | u32 ecc_addr, corrected_cnt, uncorrected_cnt; | ||
230 | u32 corrected_delta, uncorrected_delta; | ||
231 | u32 corrected_overflow, uncorrected_overflow; | ||
232 | |||
233 | ecc_addr = gk20a_readl(g, fb_mmu_l2tlb_ecc_address_r()); | ||
234 | corrected_cnt = gk20a_readl(g, | ||
235 | fb_mmu_l2tlb_ecc_corrected_err_count_r()); | ||
236 | uncorrected_cnt = gk20a_readl(g, | ||
237 | fb_mmu_l2tlb_ecc_uncorrected_err_count_r()); | ||
238 | |||
239 | corrected_delta = fb_mmu_l2tlb_ecc_corrected_err_count_total_v( | ||
240 | corrected_cnt); | ||
241 | uncorrected_delta = fb_mmu_l2tlb_ecc_uncorrected_err_count_total_v( | ||
242 | uncorrected_cnt); | ||
243 | corrected_overflow = ecc_status & | ||
244 | fb_mmu_l2tlb_ecc_status_corrected_err_total_counter_overflow_m(); | ||
245 | |||
246 | uncorrected_overflow = ecc_status & | ||
247 | fb_mmu_l2tlb_ecc_status_uncorrected_err_total_counter_overflow_m(); | ||
248 | |||
249 | /* clear the interrupt */ | ||
250 | if ((corrected_delta > 0) || corrected_overflow) | ||
251 | gk20a_writel(g, fb_mmu_l2tlb_ecc_corrected_err_count_r(), 0); | ||
252 | if ((uncorrected_delta > 0) || uncorrected_overflow) | ||
253 | gk20a_writel(g, fb_mmu_l2tlb_ecc_uncorrected_err_count_r(), 0); | ||
254 | |||
255 | gk20a_writel(g, fb_mmu_l2tlb_ecc_status_r(), | ||
256 | fb_mmu_l2tlb_ecc_status_reset_clear_f()); | ||
257 | |||
258 | /* Handle overflow */ | ||
259 | if (corrected_overflow) | ||
260 | corrected_delta += (0x1UL << fb_mmu_l2tlb_ecc_corrected_err_count_total_s()); | ||
261 | if (uncorrected_overflow) | ||
262 | uncorrected_delta += (0x1UL << fb_mmu_l2tlb_ecc_uncorrected_err_count_total_s()); | ||
263 | |||
264 | |||
265 | g->ecc.eng.t19x.mmu_l2tlb_corrected_err_count.counters[0] += | ||
266 | corrected_delta; | ||
267 | g->ecc.eng.t19x.mmu_l2tlb_uncorrected_err_count.counters[0] += | ||
268 | uncorrected_delta; | ||
269 | |||
270 | if (ecc_status & fb_mmu_l2tlb_ecc_status_corrected_err_l2tlb_sa_data_m()) | ||
271 | nvgpu_log(g, gpu_dbg_intr, "corrected ecc sa data error"); | ||
272 | if (ecc_status & fb_mmu_l2tlb_ecc_status_uncorrected_err_l2tlb_sa_data_m()) | ||
273 | nvgpu_log(g, gpu_dbg_intr, "uncorrected ecc sa data error"); | ||
274 | if (corrected_overflow || uncorrected_overflow) | ||
275 | nvgpu_info(g, "mmu l2tlb ecc counter overflow!"); | ||
276 | |||
277 | nvgpu_log(g, gpu_dbg_intr, | ||
278 | "ecc error address: 0x%x", ecc_addr); | ||
279 | nvgpu_log(g, gpu_dbg_intr, | ||
280 | "ecc error count corrected: %d, uncorrected %d", | ||
281 | g->ecc.eng.t19x.mmu_l2tlb_corrected_err_count.counters[0], | ||
282 | g->ecc.eng.t19x.mmu_l2tlb_uncorrected_err_count.counters[0]); | ||
283 | } | ||
284 | |||
285 | static void gv11b_handle_hubtlb_ecc_isr(struct gk20a *g, u32 ecc_status) | ||
286 | { | ||
287 | u32 ecc_addr, corrected_cnt, uncorrected_cnt; | ||
288 | u32 corrected_delta, uncorrected_delta; | ||
289 | u32 corrected_overflow, uncorrected_overflow; | ||
290 | |||
291 | ecc_addr = gk20a_readl(g, fb_mmu_hubtlb_ecc_address_r()); | ||
292 | corrected_cnt = gk20a_readl(g, | ||
293 | fb_mmu_hubtlb_ecc_corrected_err_count_r()); | ||
294 | uncorrected_cnt = gk20a_readl(g, | ||
295 | fb_mmu_hubtlb_ecc_uncorrected_err_count_r()); | ||
296 | |||
297 | corrected_delta = fb_mmu_hubtlb_ecc_corrected_err_count_total_v( | ||
298 | corrected_cnt); | ||
299 | uncorrected_delta = fb_mmu_hubtlb_ecc_uncorrected_err_count_total_v( | ||
300 | uncorrected_cnt); | ||
301 | corrected_overflow = ecc_status & | ||
302 | fb_mmu_hubtlb_ecc_status_corrected_err_total_counter_overflow_m(); | ||
303 | |||
304 | uncorrected_overflow = ecc_status & | ||
305 | fb_mmu_hubtlb_ecc_status_uncorrected_err_total_counter_overflow_m(); | ||
306 | |||
307 | /* clear the interrupt */ | ||
308 | if ((corrected_delta > 0) || corrected_overflow) | ||
309 | gk20a_writel(g, fb_mmu_hubtlb_ecc_corrected_err_count_r(), 0); | ||
310 | if ((uncorrected_delta > 0) || uncorrected_overflow) | ||
311 | gk20a_writel(g, fb_mmu_hubtlb_ecc_uncorrected_err_count_r(), 0); | ||
312 | |||
313 | gk20a_writel(g, fb_mmu_hubtlb_ecc_status_r(), | ||
314 | fb_mmu_hubtlb_ecc_status_reset_clear_f()); | ||
315 | |||
316 | /* Handle overflow */ | ||
317 | if (corrected_overflow) | ||
318 | corrected_delta += (0x1UL << fb_mmu_hubtlb_ecc_corrected_err_count_total_s()); | ||
319 | if (uncorrected_overflow) | ||
320 | uncorrected_delta += (0x1UL << fb_mmu_hubtlb_ecc_uncorrected_err_count_total_s()); | ||
321 | |||
322 | |||
323 | g->ecc.eng.t19x.mmu_hubtlb_corrected_err_count.counters[0] += | ||
324 | corrected_delta; | ||
325 | g->ecc.eng.t19x.mmu_hubtlb_uncorrected_err_count.counters[0] += | ||
326 | uncorrected_delta; | ||
327 | |||
328 | if (ecc_status & fb_mmu_hubtlb_ecc_status_corrected_err_sa_data_m()) | ||
329 | nvgpu_log(g, gpu_dbg_intr, "corrected ecc sa data error"); | ||
330 | if (ecc_status & fb_mmu_hubtlb_ecc_status_uncorrected_err_sa_data_m()) | ||
331 | nvgpu_log(g, gpu_dbg_intr, "uncorrected ecc sa data error"); | ||
332 | if (corrected_overflow || uncorrected_overflow) | ||
333 | nvgpu_info(g, "mmu hubtlb ecc counter overflow!"); | ||
334 | |||
335 | nvgpu_log(g, gpu_dbg_intr, | ||
336 | "ecc error address: 0x%x", ecc_addr); | ||
337 | nvgpu_log(g, gpu_dbg_intr, | ||
338 | "ecc error count corrected: %d, uncorrected %d", | ||
339 | g->ecc.eng.t19x.mmu_hubtlb_corrected_err_count.counters[0], | ||
340 | g->ecc.eng.t19x.mmu_hubtlb_uncorrected_err_count.counters[0]); | ||
341 | } | ||
342 | |||
343 | static void gv11b_handle_fillunit_ecc_isr(struct gk20a *g, u32 ecc_status) | ||
344 | { | ||
345 | u32 ecc_addr, corrected_cnt, uncorrected_cnt; | ||
346 | u32 corrected_delta, uncorrected_delta; | ||
347 | u32 corrected_overflow, uncorrected_overflow; | ||
348 | |||
349 | ecc_addr = gk20a_readl(g, fb_mmu_fillunit_ecc_address_r()); | ||
350 | corrected_cnt = gk20a_readl(g, | ||
351 | fb_mmu_fillunit_ecc_corrected_err_count_r()); | ||
352 | uncorrected_cnt = gk20a_readl(g, | ||
353 | fb_mmu_fillunit_ecc_uncorrected_err_count_r()); | ||
354 | |||
355 | corrected_delta = fb_mmu_fillunit_ecc_corrected_err_count_total_v( | ||
356 | corrected_cnt); | ||
357 | uncorrected_delta = fb_mmu_fillunit_ecc_uncorrected_err_count_total_v( | ||
358 | uncorrected_cnt); | ||
359 | corrected_overflow = ecc_status & | ||
360 | fb_mmu_fillunit_ecc_status_corrected_err_total_counter_overflow_m(); | ||
361 | |||
362 | uncorrected_overflow = ecc_status & | ||
363 | fb_mmu_fillunit_ecc_status_uncorrected_err_total_counter_overflow_m(); | ||
364 | |||
365 | /* clear the interrupt */ | ||
366 | if ((corrected_delta > 0) || corrected_overflow) | ||
367 | gk20a_writel(g, fb_mmu_fillunit_ecc_corrected_err_count_r(), 0); | ||
368 | if ((uncorrected_delta > 0) || uncorrected_overflow) | ||
369 | gk20a_writel(g, fb_mmu_fillunit_ecc_uncorrected_err_count_r(), 0); | ||
370 | |||
371 | gk20a_writel(g, fb_mmu_fillunit_ecc_status_r(), | ||
372 | fb_mmu_fillunit_ecc_status_reset_clear_f()); | ||
373 | |||
374 | /* Handle overflow */ | ||
375 | if (corrected_overflow) | ||
376 | corrected_delta += (0x1UL << fb_mmu_fillunit_ecc_corrected_err_count_total_s()); | ||
377 | if (uncorrected_overflow) | ||
378 | uncorrected_delta += (0x1UL << fb_mmu_fillunit_ecc_uncorrected_err_count_total_s()); | ||
379 | |||
380 | |||
381 | g->ecc.eng.t19x.mmu_fillunit_corrected_err_count.counters[0] += | ||
382 | corrected_delta; | ||
383 | g->ecc.eng.t19x.mmu_fillunit_uncorrected_err_count.counters[0] += | ||
384 | uncorrected_delta; | ||
385 | |||
386 | if (ecc_status & fb_mmu_fillunit_ecc_status_corrected_err_pte_data_m()) | ||
387 | nvgpu_log(g, gpu_dbg_intr, "corrected ecc pte data error"); | ||
388 | if (ecc_status & fb_mmu_fillunit_ecc_status_uncorrected_err_pte_data_m()) | ||
389 | nvgpu_log(g, gpu_dbg_intr, "uncorrected ecc pte data error"); | ||
390 | if (ecc_status & fb_mmu_fillunit_ecc_status_corrected_err_pde0_data_m()) | ||
391 | nvgpu_log(g, gpu_dbg_intr, "corrected ecc pde0 data error"); | ||
392 | if (ecc_status & fb_mmu_fillunit_ecc_status_uncorrected_err_pde0_data_m()) | ||
393 | nvgpu_log(g, gpu_dbg_intr, "uncorrected ecc pde0 data error"); | ||
394 | |||
395 | if (corrected_overflow || uncorrected_overflow) | ||
396 | nvgpu_info(g, "mmu fillunit ecc counter overflow!"); | ||
397 | |||
398 | nvgpu_log(g, gpu_dbg_intr, | ||
399 | "ecc error address: 0x%x", ecc_addr); | ||
400 | nvgpu_log(g, gpu_dbg_intr, | ||
401 | "ecc error count corrected: %d, uncorrected %d", | ||
402 | g->ecc.eng.t19x.mmu_fillunit_corrected_err_count.counters[0], | ||
403 | g->ecc.eng.t19x.mmu_fillunit_uncorrected_err_count.counters[0]); | ||
404 | } | ||
405 | |||
227 | static void gv11b_fb_hub_isr(struct gk20a *g) | 406 | static void gv11b_fb_hub_isr(struct gk20a *g) |
228 | { | 407 | { |
229 | u32 status; | 408 | u32 status; |
@@ -246,28 +425,16 @@ static void gv11b_fb_hub_isr(struct gk20a *g) | |||
246 | HUB_INTR_TYPE_ECC_UNCORRECTED); | 425 | HUB_INTR_TYPE_ECC_UNCORRECTED); |
247 | 426 | ||
248 | status = gk20a_readl(g, fb_mmu_l2tlb_ecc_status_r()); | 427 | status = gk20a_readl(g, fb_mmu_l2tlb_ecc_status_r()); |
249 | if (status) { | 428 | if (status) |
250 | nvgpu_info(g, "hub mmu L2 ecc status: 0x%x", | 429 | gv11b_handle_l2tlb_ecc_isr(g, status); |
251 | status); | ||
252 | gk20a_writel(g, fb_mmu_l2tlb_ecc_status_r(), | ||
253 | fb_mmu_l2tlb_ecc_status_reset_clear_f()); | ||
254 | } | ||
255 | 430 | ||
256 | status = gk20a_readl(g, fb_mmu_hubtlb_ecc_status_r()); | 431 | status = gk20a_readl(g, fb_mmu_hubtlb_ecc_status_r()); |
257 | if (status) { | 432 | if (status) |
258 | nvgpu_info(g, "hub mmu hub tlb ecc status: 0x%x", | 433 | gv11b_handle_hubtlb_ecc_isr(g, status); |
259 | status); | ||
260 | gk20a_writel(g, fb_mmu_hubtlb_ecc_status_r(), | ||
261 | fb_mmu_hubtlb_ecc_status_reset_clear_f()); | ||
262 | } | ||
263 | 434 | ||
264 | status = gk20a_readl(g, fb_mmu_fillunit_ecc_status_r()); | 435 | status = gk20a_readl(g, fb_mmu_fillunit_ecc_status_r()); |
265 | if (status) { | 436 | if (status) |
266 | nvgpu_info(g, "hub mmu fill unit ecc status: 0x%x", | 437 | gv11b_handle_fillunit_ecc_isr(g, status); |
267 | status); | ||
268 | gk20a_writel(g, fb_mmu_fillunit_ecc_status_r(), | ||
269 | fb_mmu_fillunit_ecc_status_reset_clear_f()); | ||
270 | } | ||
271 | 438 | ||
272 | /* re-enable interrupts after handling */ | 439 | /* re-enable interrupts after handling */ |
273 | gv11b_fb_enable_hub_intr(g, STALL_REG_INDEX, | 440 | gv11b_fb_enable_hub_intr(g, STALL_REG_INDEX, |