summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gv11b
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b')
-rw-r--r--drivers/gpu/nvgpu/gv11b/ecc_gv11b.h12
-rw-r--r--drivers/gpu/nvgpu/gv11b/fb_gv11b.c203
-rw-r--r--drivers/gpu/nvgpu/gv11b/platform_gv11b_tegra.c80
3 files changed, 277 insertions, 18 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/ecc_gv11b.h b/drivers/gpu/nvgpu/gv11b/ecc_gv11b.h
index 70b1bab8..10750426 100644
--- a/drivers/gpu/nvgpu/gv11b/ecc_gv11b.h
+++ b/drivers/gpu/nvgpu/gv11b/ecc_gv11b.h
@@ -40,4 +40,16 @@ struct ecc_ltc_t19x {
40 struct gk20a_ecc_stat l2_cache_uncorrected_err_count; 40 struct gk20a_ecc_stat l2_cache_uncorrected_err_count;
41}; 41};
42 42
43/* TODO: PMU and FB ECC features are still under embargo */
44struct ecc_eng_t19x {
45 /* FB */
46 struct gk20a_ecc_stat mmu_l2tlb_corrected_err_count;
47 struct gk20a_ecc_stat mmu_l2tlb_uncorrected_err_count;
48 struct gk20a_ecc_stat mmu_hubtlb_corrected_err_count;
49 struct gk20a_ecc_stat mmu_hubtlb_uncorrected_err_count;
50 struct gk20a_ecc_stat mmu_fillunit_corrected_err_count;
51 struct gk20a_ecc_stat mmu_fillunit_uncorrected_err_count;
52 /* PMU */
53};
54
43#endif 55#endif
diff --git a/drivers/gpu/nvgpu/gv11b/fb_gv11b.c b/drivers/gpu/nvgpu/gv11b/fb_gv11b.c
index 975692a6..9a4ea36b 100644
--- a/drivers/gpu/nvgpu/gv11b/fb_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/fb_gv11b.c
@@ -224,6 +224,185 @@ void gv11b_fb_disable_hub_intr(struct gk20a *g,
224 gv11b_fb_intr_en_clr(g, index, mask); 224 gv11b_fb_intr_en_clr(g, index, mask);
225} 225}
226 226
227static void gv11b_handle_l2tlb_ecc_isr(struct gk20a *g, u32 ecc_status)
228{
229 u32 ecc_addr, corrected_cnt, uncorrected_cnt;
230 u32 corrected_delta, uncorrected_delta;
231 u32 corrected_overflow, uncorrected_overflow;
232
233 ecc_addr = gk20a_readl(g, fb_mmu_l2tlb_ecc_address_r());
234 corrected_cnt = gk20a_readl(g,
235 fb_mmu_l2tlb_ecc_corrected_err_count_r());
236 uncorrected_cnt = gk20a_readl(g,
237 fb_mmu_l2tlb_ecc_uncorrected_err_count_r());
238
239 corrected_delta = fb_mmu_l2tlb_ecc_corrected_err_count_total_v(
240 corrected_cnt);
241 uncorrected_delta = fb_mmu_l2tlb_ecc_uncorrected_err_count_total_v(
242 uncorrected_cnt);
243 corrected_overflow = ecc_status &
244 fb_mmu_l2tlb_ecc_status_corrected_err_total_counter_overflow_m();
245
246 uncorrected_overflow = ecc_status &
247 fb_mmu_l2tlb_ecc_status_uncorrected_err_total_counter_overflow_m();
248
249 /* clear the interrupt */
250 if ((corrected_delta > 0) || corrected_overflow)
251 gk20a_writel(g, fb_mmu_l2tlb_ecc_corrected_err_count_r(), 0);
252 if ((uncorrected_delta > 0) || uncorrected_overflow)
253 gk20a_writel(g, fb_mmu_l2tlb_ecc_uncorrected_err_count_r(), 0);
254
255 gk20a_writel(g, fb_mmu_l2tlb_ecc_status_r(),
256 fb_mmu_l2tlb_ecc_status_reset_clear_f());
257
258 /* Handle overflow */
259 if (corrected_overflow)
260 corrected_delta += (0x1UL << fb_mmu_l2tlb_ecc_corrected_err_count_total_s());
261 if (uncorrected_overflow)
262 uncorrected_delta += (0x1UL << fb_mmu_l2tlb_ecc_uncorrected_err_count_total_s());
263
264
265 g->ecc.eng.t19x.mmu_l2tlb_corrected_err_count.counters[0] +=
266 corrected_delta;
267 g->ecc.eng.t19x.mmu_l2tlb_uncorrected_err_count.counters[0] +=
268 uncorrected_delta;
269
270 if (ecc_status & fb_mmu_l2tlb_ecc_status_corrected_err_l2tlb_sa_data_m())
271 nvgpu_log(g, gpu_dbg_intr, "corrected ecc sa data error");
272 if (ecc_status & fb_mmu_l2tlb_ecc_status_uncorrected_err_l2tlb_sa_data_m())
273 nvgpu_log(g, gpu_dbg_intr, "uncorrected ecc sa data error");
274 if (corrected_overflow || uncorrected_overflow)
275 nvgpu_info(g, "mmu l2tlb ecc counter overflow!");
276
277 nvgpu_log(g, gpu_dbg_intr,
278 "ecc error address: 0x%x", ecc_addr);
279 nvgpu_log(g, gpu_dbg_intr,
280 "ecc error count corrected: %d, uncorrected %d",
281 g->ecc.eng.t19x.mmu_l2tlb_corrected_err_count.counters[0],
282 g->ecc.eng.t19x.mmu_l2tlb_uncorrected_err_count.counters[0]);
283}
284
285static void gv11b_handle_hubtlb_ecc_isr(struct gk20a *g, u32 ecc_status)
286{
287 u32 ecc_addr, corrected_cnt, uncorrected_cnt;
288 u32 corrected_delta, uncorrected_delta;
289 u32 corrected_overflow, uncorrected_overflow;
290
291 ecc_addr = gk20a_readl(g, fb_mmu_hubtlb_ecc_address_r());
292 corrected_cnt = gk20a_readl(g,
293 fb_mmu_hubtlb_ecc_corrected_err_count_r());
294 uncorrected_cnt = gk20a_readl(g,
295 fb_mmu_hubtlb_ecc_uncorrected_err_count_r());
296
297 corrected_delta = fb_mmu_hubtlb_ecc_corrected_err_count_total_v(
298 corrected_cnt);
299 uncorrected_delta = fb_mmu_hubtlb_ecc_uncorrected_err_count_total_v(
300 uncorrected_cnt);
301 corrected_overflow = ecc_status &
302 fb_mmu_hubtlb_ecc_status_corrected_err_total_counter_overflow_m();
303
304 uncorrected_overflow = ecc_status &
305 fb_mmu_hubtlb_ecc_status_uncorrected_err_total_counter_overflow_m();
306
307 /* clear the interrupt */
308 if ((corrected_delta > 0) || corrected_overflow)
309 gk20a_writel(g, fb_mmu_hubtlb_ecc_corrected_err_count_r(), 0);
310 if ((uncorrected_delta > 0) || uncorrected_overflow)
311 gk20a_writel(g, fb_mmu_hubtlb_ecc_uncorrected_err_count_r(), 0);
312
313 gk20a_writel(g, fb_mmu_hubtlb_ecc_status_r(),
314 fb_mmu_hubtlb_ecc_status_reset_clear_f());
315
316 /* Handle overflow */
317 if (corrected_overflow)
318 corrected_delta += (0x1UL << fb_mmu_hubtlb_ecc_corrected_err_count_total_s());
319 if (uncorrected_overflow)
320 uncorrected_delta += (0x1UL << fb_mmu_hubtlb_ecc_uncorrected_err_count_total_s());
321
322
323 g->ecc.eng.t19x.mmu_hubtlb_corrected_err_count.counters[0] +=
324 corrected_delta;
325 g->ecc.eng.t19x.mmu_hubtlb_uncorrected_err_count.counters[0] +=
326 uncorrected_delta;
327
328 if (ecc_status & fb_mmu_hubtlb_ecc_status_corrected_err_sa_data_m())
329 nvgpu_log(g, gpu_dbg_intr, "corrected ecc sa data error");
330 if (ecc_status & fb_mmu_hubtlb_ecc_status_uncorrected_err_sa_data_m())
331 nvgpu_log(g, gpu_dbg_intr, "uncorrected ecc sa data error");
332 if (corrected_overflow || uncorrected_overflow)
333 nvgpu_info(g, "mmu hubtlb ecc counter overflow!");
334
335 nvgpu_log(g, gpu_dbg_intr,
336 "ecc error address: 0x%x", ecc_addr);
337 nvgpu_log(g, gpu_dbg_intr,
338 "ecc error count corrected: %d, uncorrected %d",
339 g->ecc.eng.t19x.mmu_hubtlb_corrected_err_count.counters[0],
340 g->ecc.eng.t19x.mmu_hubtlb_uncorrected_err_count.counters[0]);
341}
342
343static void gv11b_handle_fillunit_ecc_isr(struct gk20a *g, u32 ecc_status)
344{
345 u32 ecc_addr, corrected_cnt, uncorrected_cnt;
346 u32 corrected_delta, uncorrected_delta;
347 u32 corrected_overflow, uncorrected_overflow;
348
349 ecc_addr = gk20a_readl(g, fb_mmu_fillunit_ecc_address_r());
350 corrected_cnt = gk20a_readl(g,
351 fb_mmu_fillunit_ecc_corrected_err_count_r());
352 uncorrected_cnt = gk20a_readl(g,
353 fb_mmu_fillunit_ecc_uncorrected_err_count_r());
354
355 corrected_delta = fb_mmu_fillunit_ecc_corrected_err_count_total_v(
356 corrected_cnt);
357 uncorrected_delta = fb_mmu_fillunit_ecc_uncorrected_err_count_total_v(
358 uncorrected_cnt);
359 corrected_overflow = ecc_status &
360 fb_mmu_fillunit_ecc_status_corrected_err_total_counter_overflow_m();
361
362 uncorrected_overflow = ecc_status &
363 fb_mmu_fillunit_ecc_status_uncorrected_err_total_counter_overflow_m();
364
365 /* clear the interrupt */
366 if ((corrected_delta > 0) || corrected_overflow)
367 gk20a_writel(g, fb_mmu_fillunit_ecc_corrected_err_count_r(), 0);
368 if ((uncorrected_delta > 0) || uncorrected_overflow)
369 gk20a_writel(g, fb_mmu_fillunit_ecc_uncorrected_err_count_r(), 0);
370
371 gk20a_writel(g, fb_mmu_fillunit_ecc_status_r(),
372 fb_mmu_fillunit_ecc_status_reset_clear_f());
373
374 /* Handle overflow */
375 if (corrected_overflow)
376 corrected_delta += (0x1UL << fb_mmu_fillunit_ecc_corrected_err_count_total_s());
377 if (uncorrected_overflow)
378 uncorrected_delta += (0x1UL << fb_mmu_fillunit_ecc_uncorrected_err_count_total_s());
379
380
381 g->ecc.eng.t19x.mmu_fillunit_corrected_err_count.counters[0] +=
382 corrected_delta;
383 g->ecc.eng.t19x.mmu_fillunit_uncorrected_err_count.counters[0] +=
384 uncorrected_delta;
385
386 if (ecc_status & fb_mmu_fillunit_ecc_status_corrected_err_pte_data_m())
387 nvgpu_log(g, gpu_dbg_intr, "corrected ecc pte data error");
388 if (ecc_status & fb_mmu_fillunit_ecc_status_uncorrected_err_pte_data_m())
389 nvgpu_log(g, gpu_dbg_intr, "uncorrected ecc pte data error");
390 if (ecc_status & fb_mmu_fillunit_ecc_status_corrected_err_pde0_data_m())
391 nvgpu_log(g, gpu_dbg_intr, "corrected ecc pde0 data error");
392 if (ecc_status & fb_mmu_fillunit_ecc_status_uncorrected_err_pde0_data_m())
393 nvgpu_log(g, gpu_dbg_intr, "uncorrected ecc pde0 data error");
394
395 if (corrected_overflow || uncorrected_overflow)
396 nvgpu_info(g, "mmu fillunit ecc counter overflow!");
397
398 nvgpu_log(g, gpu_dbg_intr,
399 "ecc error address: 0x%x", ecc_addr);
400 nvgpu_log(g, gpu_dbg_intr,
401 "ecc error count corrected: %d, uncorrected %d",
402 g->ecc.eng.t19x.mmu_fillunit_corrected_err_count.counters[0],
403 g->ecc.eng.t19x.mmu_fillunit_uncorrected_err_count.counters[0]);
404}
405
227static void gv11b_fb_hub_isr(struct gk20a *g) 406static void gv11b_fb_hub_isr(struct gk20a *g)
228{ 407{
229 u32 status; 408 u32 status;
@@ -246,28 +425,16 @@ static void gv11b_fb_hub_isr(struct gk20a *g)
246 HUB_INTR_TYPE_ECC_UNCORRECTED); 425 HUB_INTR_TYPE_ECC_UNCORRECTED);
247 426
248 status = gk20a_readl(g, fb_mmu_l2tlb_ecc_status_r()); 427 status = gk20a_readl(g, fb_mmu_l2tlb_ecc_status_r());
249 if (status) { 428 if (status)
250 nvgpu_info(g, "hub mmu L2 ecc status: 0x%x", 429 gv11b_handle_l2tlb_ecc_isr(g, status);
251 status);
252 gk20a_writel(g, fb_mmu_l2tlb_ecc_status_r(),
253 fb_mmu_l2tlb_ecc_status_reset_clear_f());
254 }
255 430
256 status = gk20a_readl(g, fb_mmu_hubtlb_ecc_status_r()); 431 status = gk20a_readl(g, fb_mmu_hubtlb_ecc_status_r());
257 if (status) { 432 if (status)
258 nvgpu_info(g, "hub mmu hub tlb ecc status: 0x%x", 433 gv11b_handle_hubtlb_ecc_isr(g, status);
259 status);
260 gk20a_writel(g, fb_mmu_hubtlb_ecc_status_r(),
261 fb_mmu_hubtlb_ecc_status_reset_clear_f());
262 }
263 434
264 status = gk20a_readl(g, fb_mmu_fillunit_ecc_status_r()); 435 status = gk20a_readl(g, fb_mmu_fillunit_ecc_status_r());
265 if (status) { 436 if (status)
266 nvgpu_info(g, "hub mmu fill unit ecc status: 0x%x", 437 gv11b_handle_fillunit_ecc_isr(g, status);
267 status);
268 gk20a_writel(g, fb_mmu_fillunit_ecc_status_r(),
269 fb_mmu_fillunit_ecc_status_reset_clear_f());
270 }
271 438
272 /* re-enable interrupts after handling */ 439 /* re-enable interrupts after handling */
273 gv11b_fb_enable_hub_intr(g, STALL_REG_INDEX, 440 gv11b_fb_enable_hub_intr(g, STALL_REG_INDEX,
diff --git a/drivers/gpu/nvgpu/gv11b/platform_gv11b_tegra.c b/drivers/gpu/nvgpu/gv11b/platform_gv11b_tegra.c
index c69e1478..95bbfbd3 100644
--- a/drivers/gpu/nvgpu/gv11b/platform_gv11b_tegra.c
+++ b/drivers/gpu/nvgpu/gv11b/platform_gv11b_tegra.c
@@ -182,6 +182,13 @@ static struct device_attribute *dev_attr_gpccs_ecc_uncorrected_err_count_array;
182static struct device_attribute *dev_attr_l2_cache_ecc_corrected_err_count_array; 182static struct device_attribute *dev_attr_l2_cache_ecc_corrected_err_count_array;
183static struct device_attribute *dev_attr_l2_cache_ecc_uncorrected_err_count_array; 183static struct device_attribute *dev_attr_l2_cache_ecc_uncorrected_err_count_array;
184 184
185static struct device_attribute *dev_attr_mmu_l2tlb_ecc_corrected_err_count_array;
186static struct device_attribute *dev_attr_mmu_l2tlb_ecc_uncorrected_err_count_array;
187static struct device_attribute *dev_attr_mmu_hubtlb_ecc_corrected_err_count_array;
188static struct device_attribute *dev_attr_mmu_hubtlb_ecc_uncorrected_err_count_array;
189static struct device_attribute *dev_attr_mmu_fillunit_ecc_corrected_err_count_array;
190static struct device_attribute *dev_attr_mmu_fillunit_ecc_uncorrected_err_count_array;
191
185void gr_gv11b_create_sysfs(struct device *dev) 192void gr_gv11b_create_sysfs(struct device *dev)
186{ 193{
187 struct gk20a *g = get_gk20a(dev); 194 struct gk20a *g = get_gk20a(dev);
@@ -310,6 +317,49 @@ void gr_gv11b_create_sysfs(struct device *dev)
310 "mmu_l1tlb_ecc_corrected_err_count", 317 "mmu_l1tlb_ecc_corrected_err_count",
311 &g->ecc.gr.t19x.mmu_l1tlb_corrected_err_count, 318 &g->ecc.gr.t19x.mmu_l1tlb_corrected_err_count,
312 dev_attr_mmu_l1tlb_ecc_corrected_err_count_array); 319 dev_attr_mmu_l1tlb_ecc_corrected_err_count_array);
320
321 error |= gp10b_ecc_stat_create(dev,
322 1,
323 "eng",
324 "mmu_l2tlb_ecc_uncorrected_err_count",
325 &g->ecc.eng.t19x.mmu_l2tlb_uncorrected_err_count,
326 dev_attr_mmu_l2tlb_ecc_uncorrected_err_count_array);
327
328 error |= gp10b_ecc_stat_create(dev,
329 1,
330 "eng",
331 "mmu_l2tlb_ecc_corrected_err_count",
332 &g->ecc.eng.t19x.mmu_l2tlb_corrected_err_count,
333 dev_attr_mmu_l2tlb_ecc_corrected_err_count_array);
334
335 error |= gp10b_ecc_stat_create(dev,
336 1,
337 "eng",
338 "mmu_hubtlb_ecc_uncorrected_err_count",
339 &g->ecc.eng.t19x.mmu_hubtlb_uncorrected_err_count,
340 dev_attr_mmu_hubtlb_ecc_uncorrected_err_count_array);
341
342 error |= gp10b_ecc_stat_create(dev,
343 1,
344 "eng",
345 "mmu_hubtlb_ecc_corrected_err_count",
346 &g->ecc.eng.t19x.mmu_hubtlb_corrected_err_count,
347 dev_attr_mmu_hubtlb_ecc_corrected_err_count_array);
348
349 error |= gp10b_ecc_stat_create(dev,
350 1,
351 "eng",
352 "mmu_fillunit_ecc_uncorrected_err_count",
353 &g->ecc.eng.t19x.mmu_fillunit_uncorrected_err_count,
354 dev_attr_mmu_fillunit_ecc_uncorrected_err_count_array);
355
356 error |= gp10b_ecc_stat_create(dev,
357 1,
358 "eng",
359 "mmu_fillunit_ecc_corrected_err_count",
360 &g->ecc.eng.t19x.mmu_fillunit_corrected_err_count,
361 dev_attr_mmu_fillunit_ecc_corrected_err_count_array);
362
313 if (error) 363 if (error)
314 dev_err(dev, "Failed to create gv11b sysfs attributes!\n"); 364 dev_err(dev, "Failed to create gv11b sysfs attributes!\n");
315} 365}
@@ -407,4 +457,34 @@ static void gr_gv11b_remove_sysfs(struct device *dev)
407 g->gr.gpc_count, 457 g->gr.gpc_count,
408 &g->ecc.gr.t19x.mmu_l1tlb_corrected_err_count, 458 &g->ecc.gr.t19x.mmu_l1tlb_corrected_err_count,
409 dev_attr_mmu_l1tlb_ecc_corrected_err_count_array); 459 dev_attr_mmu_l1tlb_ecc_corrected_err_count_array);
460
461 gp10b_ecc_stat_remove(dev,
462 1,
463 &g->ecc.eng.t19x.mmu_l2tlb_uncorrected_err_count,
464 dev_attr_mmu_l2tlb_ecc_uncorrected_err_count_array);
465
466 gp10b_ecc_stat_remove(dev,
467 1,
468 &g->ecc.eng.t19x.mmu_l2tlb_corrected_err_count,
469 dev_attr_mmu_l2tlb_ecc_corrected_err_count_array);
470
471 gp10b_ecc_stat_remove(dev,
472 1,
473 &g->ecc.eng.t19x.mmu_hubtlb_uncorrected_err_count,
474 dev_attr_mmu_hubtlb_ecc_uncorrected_err_count_array);
475
476 gp10b_ecc_stat_remove(dev,
477 1,
478 &g->ecc.eng.t19x.mmu_hubtlb_corrected_err_count,
479 dev_attr_mmu_hubtlb_ecc_corrected_err_count_array);
480
481 gp10b_ecc_stat_remove(dev,
482 1,
483 &g->ecc.eng.t19x.mmu_fillunit_uncorrected_err_count,
484 dev_attr_mmu_fillunit_ecc_uncorrected_err_count_array);
485
486 gp10b_ecc_stat_remove(dev,
487 1,
488 &g->ecc.eng.t19x.mmu_fillunit_corrected_err_count,
489 dev_attr_mmu_fillunit_ecc_corrected_err_count_array);
410} 490}