summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gv11b/fb_gv11b.c
diff options
context:
space:
mode:
authorDavid Nieto <dmartineznie@nvidia.com>2017-05-26 17:36:26 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-06-04 23:34:58 -0400
commit3dc28cb1ab934ebcda33933086d7d0ffc8d1f907 (patch)
tree4a27e428220d8083992c07fb905b430f91162cf8 /drivers/gpu/nvgpu/gv11b/fb_gv11b.c
parent345eaef6a76771da9c3e8a5e375fc9d659fb1b2b (diff)
gpu: nvgpu: add chip specific ECC counters
Add support for ECC counters for HUB MMU JIRA: GPUT19X-82 Change-Id: I691d5898d4db9fe2cd68f217baa646479ab5cb00 Signed-off-by: David Nieto <dmartineznie@nvidia.com> Reviewed-on: http://git-master/r/1490825 Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b/fb_gv11b.c')
-rw-r--r--drivers/gpu/nvgpu/gv11b/fb_gv11b.c203
1 files changed, 185 insertions, 18 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/fb_gv11b.c b/drivers/gpu/nvgpu/gv11b/fb_gv11b.c
index 975692a6..9a4ea36b 100644
--- a/drivers/gpu/nvgpu/gv11b/fb_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/fb_gv11b.c
@@ -224,6 +224,185 @@ void gv11b_fb_disable_hub_intr(struct gk20a *g,
224 gv11b_fb_intr_en_clr(g, index, mask); 224 gv11b_fb_intr_en_clr(g, index, mask);
225} 225}
226 226
227static void gv11b_handle_l2tlb_ecc_isr(struct gk20a *g, u32 ecc_status)
228{
229 u32 ecc_addr, corrected_cnt, uncorrected_cnt;
230 u32 corrected_delta, uncorrected_delta;
231 u32 corrected_overflow, uncorrected_overflow;
232
233 ecc_addr = gk20a_readl(g, fb_mmu_l2tlb_ecc_address_r());
234 corrected_cnt = gk20a_readl(g,
235 fb_mmu_l2tlb_ecc_corrected_err_count_r());
236 uncorrected_cnt = gk20a_readl(g,
237 fb_mmu_l2tlb_ecc_uncorrected_err_count_r());
238
239 corrected_delta = fb_mmu_l2tlb_ecc_corrected_err_count_total_v(
240 corrected_cnt);
241 uncorrected_delta = fb_mmu_l2tlb_ecc_uncorrected_err_count_total_v(
242 uncorrected_cnt);
243 corrected_overflow = ecc_status &
244 fb_mmu_l2tlb_ecc_status_corrected_err_total_counter_overflow_m();
245
246 uncorrected_overflow = ecc_status &
247 fb_mmu_l2tlb_ecc_status_uncorrected_err_total_counter_overflow_m();
248
249 /* clear the interrupt */
250 if ((corrected_delta > 0) || corrected_overflow)
251 gk20a_writel(g, fb_mmu_l2tlb_ecc_corrected_err_count_r(), 0);
252 if ((uncorrected_delta > 0) || uncorrected_overflow)
253 gk20a_writel(g, fb_mmu_l2tlb_ecc_uncorrected_err_count_r(), 0);
254
255 gk20a_writel(g, fb_mmu_l2tlb_ecc_status_r(),
256 fb_mmu_l2tlb_ecc_status_reset_clear_f());
257
258 /* Handle overflow */
259 if (corrected_overflow)
260 corrected_delta += (0x1UL << fb_mmu_l2tlb_ecc_corrected_err_count_total_s());
261 if (uncorrected_overflow)
262 uncorrected_delta += (0x1UL << fb_mmu_l2tlb_ecc_uncorrected_err_count_total_s());
263
264
265 g->ecc.eng.t19x.mmu_l2tlb_corrected_err_count.counters[0] +=
266 corrected_delta;
267 g->ecc.eng.t19x.mmu_l2tlb_uncorrected_err_count.counters[0] +=
268 uncorrected_delta;
269
270 if (ecc_status & fb_mmu_l2tlb_ecc_status_corrected_err_l2tlb_sa_data_m())
271 nvgpu_log(g, gpu_dbg_intr, "corrected ecc sa data error");
272 if (ecc_status & fb_mmu_l2tlb_ecc_status_uncorrected_err_l2tlb_sa_data_m())
273 nvgpu_log(g, gpu_dbg_intr, "uncorrected ecc sa data error");
274 if (corrected_overflow || uncorrected_overflow)
275 nvgpu_info(g, "mmu l2tlb ecc counter overflow!");
276
277 nvgpu_log(g, gpu_dbg_intr,
278 "ecc error address: 0x%x", ecc_addr);
279 nvgpu_log(g, gpu_dbg_intr,
280 "ecc error count corrected: %d, uncorrected %d",
281 g->ecc.eng.t19x.mmu_l2tlb_corrected_err_count.counters[0],
282 g->ecc.eng.t19x.mmu_l2tlb_uncorrected_err_count.counters[0]);
283}
284
285static void gv11b_handle_hubtlb_ecc_isr(struct gk20a *g, u32 ecc_status)
286{
287 u32 ecc_addr, corrected_cnt, uncorrected_cnt;
288 u32 corrected_delta, uncorrected_delta;
289 u32 corrected_overflow, uncorrected_overflow;
290
291 ecc_addr = gk20a_readl(g, fb_mmu_hubtlb_ecc_address_r());
292 corrected_cnt = gk20a_readl(g,
293 fb_mmu_hubtlb_ecc_corrected_err_count_r());
294 uncorrected_cnt = gk20a_readl(g,
295 fb_mmu_hubtlb_ecc_uncorrected_err_count_r());
296
297 corrected_delta = fb_mmu_hubtlb_ecc_corrected_err_count_total_v(
298 corrected_cnt);
299 uncorrected_delta = fb_mmu_hubtlb_ecc_uncorrected_err_count_total_v(
300 uncorrected_cnt);
301 corrected_overflow = ecc_status &
302 fb_mmu_hubtlb_ecc_status_corrected_err_total_counter_overflow_m();
303
304 uncorrected_overflow = ecc_status &
305 fb_mmu_hubtlb_ecc_status_uncorrected_err_total_counter_overflow_m();
306
307 /* clear the interrupt */
308 if ((corrected_delta > 0) || corrected_overflow)
309 gk20a_writel(g, fb_mmu_hubtlb_ecc_corrected_err_count_r(), 0);
310 if ((uncorrected_delta > 0) || uncorrected_overflow)
311 gk20a_writel(g, fb_mmu_hubtlb_ecc_uncorrected_err_count_r(), 0);
312
313 gk20a_writel(g, fb_mmu_hubtlb_ecc_status_r(),
314 fb_mmu_hubtlb_ecc_status_reset_clear_f());
315
316 /* Handle overflow */
317 if (corrected_overflow)
318 corrected_delta += (0x1UL << fb_mmu_hubtlb_ecc_corrected_err_count_total_s());
319 if (uncorrected_overflow)
320 uncorrected_delta += (0x1UL << fb_mmu_hubtlb_ecc_uncorrected_err_count_total_s());
321
322
323 g->ecc.eng.t19x.mmu_hubtlb_corrected_err_count.counters[0] +=
324 corrected_delta;
325 g->ecc.eng.t19x.mmu_hubtlb_uncorrected_err_count.counters[0] +=
326 uncorrected_delta;
327
328 if (ecc_status & fb_mmu_hubtlb_ecc_status_corrected_err_sa_data_m())
329 nvgpu_log(g, gpu_dbg_intr, "corrected ecc sa data error");
330 if (ecc_status & fb_mmu_hubtlb_ecc_status_uncorrected_err_sa_data_m())
331 nvgpu_log(g, gpu_dbg_intr, "uncorrected ecc sa data error");
332 if (corrected_overflow || uncorrected_overflow)
333 nvgpu_info(g, "mmu hubtlb ecc counter overflow!");
334
335 nvgpu_log(g, gpu_dbg_intr,
336 "ecc error address: 0x%x", ecc_addr);
337 nvgpu_log(g, gpu_dbg_intr,
338 "ecc error count corrected: %d, uncorrected %d",
339 g->ecc.eng.t19x.mmu_hubtlb_corrected_err_count.counters[0],
340 g->ecc.eng.t19x.mmu_hubtlb_uncorrected_err_count.counters[0]);
341}
342
343static void gv11b_handle_fillunit_ecc_isr(struct gk20a *g, u32 ecc_status)
344{
345 u32 ecc_addr, corrected_cnt, uncorrected_cnt;
346 u32 corrected_delta, uncorrected_delta;
347 u32 corrected_overflow, uncorrected_overflow;
348
349 ecc_addr = gk20a_readl(g, fb_mmu_fillunit_ecc_address_r());
350 corrected_cnt = gk20a_readl(g,
351 fb_mmu_fillunit_ecc_corrected_err_count_r());
352 uncorrected_cnt = gk20a_readl(g,
353 fb_mmu_fillunit_ecc_uncorrected_err_count_r());
354
355 corrected_delta = fb_mmu_fillunit_ecc_corrected_err_count_total_v(
356 corrected_cnt);
357 uncorrected_delta = fb_mmu_fillunit_ecc_uncorrected_err_count_total_v(
358 uncorrected_cnt);
359 corrected_overflow = ecc_status &
360 fb_mmu_fillunit_ecc_status_corrected_err_total_counter_overflow_m();
361
362 uncorrected_overflow = ecc_status &
363 fb_mmu_fillunit_ecc_status_uncorrected_err_total_counter_overflow_m();
364
365 /* clear the interrupt */
366 if ((corrected_delta > 0) || corrected_overflow)
367 gk20a_writel(g, fb_mmu_fillunit_ecc_corrected_err_count_r(), 0);
368 if ((uncorrected_delta > 0) || uncorrected_overflow)
369 gk20a_writel(g, fb_mmu_fillunit_ecc_uncorrected_err_count_r(), 0);
370
371 gk20a_writel(g, fb_mmu_fillunit_ecc_status_r(),
372 fb_mmu_fillunit_ecc_status_reset_clear_f());
373
374 /* Handle overflow */
375 if (corrected_overflow)
376 corrected_delta += (0x1UL << fb_mmu_fillunit_ecc_corrected_err_count_total_s());
377 if (uncorrected_overflow)
378 uncorrected_delta += (0x1UL << fb_mmu_fillunit_ecc_uncorrected_err_count_total_s());
379
380
381 g->ecc.eng.t19x.mmu_fillunit_corrected_err_count.counters[0] +=
382 corrected_delta;
383 g->ecc.eng.t19x.mmu_fillunit_uncorrected_err_count.counters[0] +=
384 uncorrected_delta;
385
386 if (ecc_status & fb_mmu_fillunit_ecc_status_corrected_err_pte_data_m())
387 nvgpu_log(g, gpu_dbg_intr, "corrected ecc pte data error");
388 if (ecc_status & fb_mmu_fillunit_ecc_status_uncorrected_err_pte_data_m())
389 nvgpu_log(g, gpu_dbg_intr, "uncorrected ecc pte data error");
390 if (ecc_status & fb_mmu_fillunit_ecc_status_corrected_err_pde0_data_m())
391 nvgpu_log(g, gpu_dbg_intr, "corrected ecc pde0 data error");
392 if (ecc_status & fb_mmu_fillunit_ecc_status_uncorrected_err_pde0_data_m())
393 nvgpu_log(g, gpu_dbg_intr, "uncorrected ecc pde0 data error");
394
395 if (corrected_overflow || uncorrected_overflow)
396 nvgpu_info(g, "mmu fillunit ecc counter overflow!");
397
398 nvgpu_log(g, gpu_dbg_intr,
399 "ecc error address: 0x%x", ecc_addr);
400 nvgpu_log(g, gpu_dbg_intr,
401 "ecc error count corrected: %d, uncorrected %d",
402 g->ecc.eng.t19x.mmu_fillunit_corrected_err_count.counters[0],
403 g->ecc.eng.t19x.mmu_fillunit_uncorrected_err_count.counters[0]);
404}
405
227static void gv11b_fb_hub_isr(struct gk20a *g) 406static void gv11b_fb_hub_isr(struct gk20a *g)
228{ 407{
229 u32 status; 408 u32 status;
@@ -246,28 +425,16 @@ static void gv11b_fb_hub_isr(struct gk20a *g)
246 HUB_INTR_TYPE_ECC_UNCORRECTED); 425 HUB_INTR_TYPE_ECC_UNCORRECTED);
247 426
248 status = gk20a_readl(g, fb_mmu_l2tlb_ecc_status_r()); 427 status = gk20a_readl(g, fb_mmu_l2tlb_ecc_status_r());
249 if (status) { 428 if (status)
250 nvgpu_info(g, "hub mmu L2 ecc status: 0x%x", 429 gv11b_handle_l2tlb_ecc_isr(g, status);
251 status);
252 gk20a_writel(g, fb_mmu_l2tlb_ecc_status_r(),
253 fb_mmu_l2tlb_ecc_status_reset_clear_f());
254 }
255 430
256 status = gk20a_readl(g, fb_mmu_hubtlb_ecc_status_r()); 431 status = gk20a_readl(g, fb_mmu_hubtlb_ecc_status_r());
257 if (status) { 432 if (status)
258 nvgpu_info(g, "hub mmu hub tlb ecc status: 0x%x", 433 gv11b_handle_hubtlb_ecc_isr(g, status);
259 status);
260 gk20a_writel(g, fb_mmu_hubtlb_ecc_status_r(),
261 fb_mmu_hubtlb_ecc_status_reset_clear_f());
262 }
263 434
264 status = gk20a_readl(g, fb_mmu_fillunit_ecc_status_r()); 435 status = gk20a_readl(g, fb_mmu_fillunit_ecc_status_r());
265 if (status) { 436 if (status)
266 nvgpu_info(g, "hub mmu fill unit ecc status: 0x%x", 437 gv11b_handle_fillunit_ecc_isr(g, status);
267 status);
268 gk20a_writel(g, fb_mmu_fillunit_ecc_status_r(),
269 fb_mmu_fillunit_ecc_status_reset_clear_f());
270 }
271 438
272 /* re-enable interrupts after handling */ 439 /* re-enable interrupts after handling */
273 gv11b_fb_enable_hub_intr(g, STALL_REG_INDEX, 440 gv11b_fb_enable_hub_intr(g, STALL_REG_INDEX,