summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/gv11b/ecc_gv11b.h5
-rw-r--r--drivers/gpu/nvgpu/gv11b/ltc_gv11b.c107
-rw-r--r--drivers/gpu/nvgpu/gv11b/platform_gv11b_tegra.c27
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_ltc_gv11b.h184
4 files changed, 323 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/ecc_gv11b.h b/drivers/gpu/nvgpu/gv11b/ecc_gv11b.h
index 6b471655..4e1696f7 100644
--- a/drivers/gpu/nvgpu/gv11b/ecc_gv11b.h
+++ b/drivers/gpu/nvgpu/gv11b/ecc_gv11b.h
@@ -33,4 +33,9 @@ struct ecc_gr_t19x {
33 struct gk20a_ecc_stat gpccs_uncorrected_err_count; 33 struct gk20a_ecc_stat gpccs_uncorrected_err_count;
34}; 34};
35 35
36struct ecc_ltc_t19x {
37 struct gk20a_ecc_stat l2_cache_corrected_err_count;
38 struct gk20a_ecc_stat l2_cache_uncorrected_err_count;
39};
40
36#endif 41#endif
diff --git a/drivers/gpu/nvgpu/gv11b/ltc_gv11b.c b/drivers/gpu/nvgpu/gv11b/ltc_gv11b.c
index 23beca5d..b8a97ce3 100644
--- a/drivers/gpu/nvgpu/gv11b/ltc_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/ltc_gv11b.c
@@ -20,6 +20,7 @@
20#include "ltc_gv11b.h" 20#include "ltc_gv11b.h"
21 21
22#include <nvgpu/hw/gv11b/hw_ltc_gv11b.h> 22#include <nvgpu/hw/gv11b/hw_ltc_gv11b.h>
23#include <nvgpu/hw/gv11b/hw_mc_gv11b.h>
23#include <nvgpu/hw/gv11b/hw_top_gv11b.h> 24#include <nvgpu/hw/gv11b/hw_top_gv11b.h>
24#include <nvgpu/hw/gv11b/hw_pri_ringmaster_gv11b.h> 25#include <nvgpu/hw/gv11b/hw_pri_ringmaster_gv11b.h>
25 26
@@ -74,6 +75,111 @@ static void gv11b_ltc_init_fs_state(struct gk20a *g)
74 ltc_intr); 75 ltc_intr);
75} 76}
76 77
78static void gv11b_ltc_isr(struct gk20a *g)
79{
80 u32 mc_intr, ltc_intr3;
81 unsigned int ltc, slice;
82 u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
83 u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
84 u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt;
85 u32 corrected_delta, uncorrected_delta;
86 u32 corrected_overflow, uncorrected_overflow;
87 u32 ltc_corrected, ltc_uncorrected;
88
89 mc_intr = gk20a_readl(g, mc_intr_ltc_r());
90 for (ltc = 0; ltc < g->ltc_count; ltc++) {
91 if ((mc_intr & 1 << ltc) == 0)
92 continue;
93 ltc_corrected = ltc_uncorrected = 0;
94
95 for (slice = 0; slice < g->gr.slices_per_ltc; slice++) {
96 u32 offset = ltc_stride * ltc + lts_stride * slice;
97 ltc_intr3 = gk20a_readl(g, ltc_ltc0_lts0_intr3_r() +
98 offset);
99
100 /* Detect and handle ECC PARITY errors */
101
102 if (ltc_intr3 &
103 (ltc_ltcs_ltss_intr3_ecc_uncorrected_m() |
104 ltc_ltcs_ltss_intr3_ecc_corrected_m())) {
105
106 ecc_status = gk20a_readl(g,
107 ltc_ltc0_lts0_l2_cache_ecc_status_r() +
108 offset);
109 ecc_addr = gk20a_readl(g,
110 ltc_ltc0_lts0_l2_cache_ecc_address_r() +
111 offset);
112 corrected_cnt = gk20a_readl(g,
113 ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_r() + offset);
114 uncorrected_cnt = gk20a_readl(g,
115 ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_r() + offset);
116
117 corrected_delta =
118 ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_total_v(corrected_cnt);
119 uncorrected_delta =
120 ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_v(uncorrected_cnt);
121 corrected_overflow = ecc_status &
122 ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_total_counter_overflow_m();
123
124 uncorrected_overflow = ecc_status &
125 ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_total_counter_overflow_m();
126
127 /* clear the interrupt */
128 if ((corrected_delta > 0) || corrected_overflow) {
129 gk20a_writel(g, ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_r() + offset, 0);
130 }
131 if ((uncorrected_delta > 0) || uncorrected_overflow) {
132 gk20a_writel(g,
133 ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_r() + offset, 0);
134 }
135
136 gk20a_writel(g, ltc_ltc0_lts0_l2_cache_ecc_status_r() + offset,
137 ltc_ltc0_lts0_l2_cache_ecc_status_reset_task_f());
138
139 /* update counters per slice */
140 if (corrected_overflow)
141 corrected_delta += (0x1UL << ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_total_s());
142 if (uncorrected_overflow)
143 uncorrected_delta += (0x1UL << ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_s());
144
145 ltc_corrected += corrected_delta;
146 ltc_uncorrected += uncorrected_delta;
147 nvgpu_log(g, gpu_dbg_intr,
148 "ltc:%d lts: %d cache ecc interrupt intr: 0x%x", ltc, slice, ltc_intr3);
149
150 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_rstg_m())
151 nvgpu_log(g, gpu_dbg_intr, "rstg ecc error corrected");
152 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_rstg_m())
153 nvgpu_log(g, gpu_dbg_intr, "rstg ecc error uncorrected");
154 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_tstg_m())
155 nvgpu_log(g, gpu_dbg_intr, "tstg ecc error corrected");
156 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_tstg_m())
157 nvgpu_log(g, gpu_dbg_intr, "tstg ecc error uncorrected");
158 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_dstg_m())
159 nvgpu_log(g, gpu_dbg_intr, "dstg ecc error corrected");
160 if (ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_dstg_m())
161 nvgpu_log(g, gpu_dbg_intr, "dstg ecc error uncorrected");
162
163 if (corrected_overflow || uncorrected_overflow)
164 nvgpu_info(g, "ecc counter overflow!");
165
166 nvgpu_log(g, gpu_dbg_intr,
167 "ecc error address: 0x%x", ecc_addr);
168
169 }
170
171 }
172 g->ecc.ltc.t19x.l2_cache_corrected_err_count.counters[ltc] +=
173 ltc_corrected;
174 g->ecc.ltc.t19x.l2_cache_uncorrected_err_count.counters[ltc] +=
175 ltc_uncorrected;
176
177 }
178
179 /* fallback to other interrupts */
180 gp10b_ltc_isr(g);
181}
182
77static u32 gv11b_ltc_cbc_fix_config(struct gk20a *g, int base) 183static u32 gv11b_ltc_cbc_fix_config(struct gk20a *g, int base)
78{ 184{
79 u32 val = gk20a_readl(g, ltc_ltcs_ltss_cbc_num_active_ltcs_r()); 185 u32 val = gk20a_readl(g, ltc_ltcs_ltss_cbc_num_active_ltcs_r());
@@ -93,4 +199,5 @@ void gv11b_init_ltc(struct gpu_ops *gops)
93 gops->ltc.set_zbc_s_entry = gv11b_ltc_set_zbc_stencil_entry; 199 gops->ltc.set_zbc_s_entry = gv11b_ltc_set_zbc_stencil_entry;
94 gops->ltc.init_fs_state = gv11b_ltc_init_fs_state; 200 gops->ltc.init_fs_state = gv11b_ltc_init_fs_state;
95 gops->ltc.cbc_fix_config = gv11b_ltc_cbc_fix_config; 201 gops->ltc.cbc_fix_config = gv11b_ltc_cbc_fix_config;
202 gops->ltc.isr = gv11b_ltc_isr;
96} 203}
diff --git a/drivers/gpu/nvgpu/gv11b/platform_gv11b_tegra.c b/drivers/gpu/nvgpu/gv11b/platform_gv11b_tegra.c
index 8733cae9..432af7c1 100644
--- a/drivers/gpu/nvgpu/gv11b/platform_gv11b_tegra.c
+++ b/drivers/gpu/nvgpu/gv11b/platform_gv11b_tegra.c
@@ -177,6 +177,9 @@ static struct device_attribute *dev_attr_fecs_ecc_uncorrected_err_count_array;
177static struct device_attribute *dev_attr_gpccs_ecc_corrected_err_count_array; 177static struct device_attribute *dev_attr_gpccs_ecc_corrected_err_count_array;
178static struct device_attribute *dev_attr_gpccs_ecc_uncorrected_err_count_array; 178static struct device_attribute *dev_attr_gpccs_ecc_uncorrected_err_count_array;
179 179
180static struct device_attribute *dev_attr_l2_cache_ecc_corrected_err_count_array;
181static struct device_attribute *dev_attr_l2_cache_ecc_uncorrected_err_count_array;
182
180void gr_gv11b_create_sysfs(struct device *dev) 183void gr_gv11b_create_sysfs(struct device *dev)
181{ 184{
182 struct gk20a *g = get_gk20a(dev); 185 struct gk20a *g = get_gk20a(dev);
@@ -251,6 +254,20 @@ void gr_gv11b_create_sysfs(struct device *dev)
251 dev_attr_gcc_l15_ecc_uncorrected_err_count_array); 254 dev_attr_gcc_l15_ecc_uncorrected_err_count_array);
252 255
253 error |= gp10b_ecc_stat_create(dev, 256 error |= gp10b_ecc_stat_create(dev,
257 g->ltc_count,
258 "ltc",
259 "l2_cache_uncorrected_err_count",
260 &g->ecc.ltc.t19x.l2_cache_uncorrected_err_count,
261 dev_attr_l2_cache_ecc_uncorrected_err_count_array);
262
263 error |= gp10b_ecc_stat_create(dev,
264 g->ltc_count,
265 "ltc",
266 "l2_cache_corrected_err_count",
267 &g->ecc.ltc.t19x.l2_cache_corrected_err_count,
268 dev_attr_l2_cache_ecc_corrected_err_count_array);
269
270 error |= gp10b_ecc_stat_create(dev,
254 1, 271 1,
255 "gpc", 272 "gpc",
256 "fecs_ecc_uncorrected_err_count", 273 "fecs_ecc_uncorrected_err_count",
@@ -337,6 +354,16 @@ static void gr_gv11b_remove_sysfs(struct device *dev)
337 dev_attr_gcc_l15_ecc_uncorrected_err_count_array); 354 dev_attr_gcc_l15_ecc_uncorrected_err_count_array);
338 355
339 gp10b_ecc_stat_remove(dev, 356 gp10b_ecc_stat_remove(dev,
357 g->ltc_count,
358 &g->ecc.ltc.t19x.l2_cache_uncorrected_err_count,
359 dev_attr_l2_cache_ecc_uncorrected_err_count_array);
360
361 gp10b_ecc_stat_remove(dev,
362 g->ltc_count,
363 &g->ecc.ltc.t19x.l2_cache_corrected_err_count,
364 dev_attr_l2_cache_ecc_corrected_err_count_array);
365
366 gp10b_ecc_stat_remove(dev,
340 1, 367 1,
341 &g->ecc.gr.t19x.fecs_uncorrected_err_count, 368 &g->ecc.gr.t19x.fecs_uncorrected_err_count,
342 dev_attr_fecs_ecc_uncorrected_err_count_array); 369 dev_attr_fecs_ecc_uncorrected_err_count_array);
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_ltc_gv11b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_ltc_gv11b.h
index 45d3df07..1bcd1246 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_ltc_gv11b.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_ltc_gv11b.h
@@ -374,6 +374,190 @@ static inline u32 ltc_ltc0_lts0_intr_r(void)
374{ 374{
375 return 0x0014040c; 375 return 0x0014040c;
376} 376}
377static inline u32 ltc_ltcs_ltss_intr3_r(void)
378{
379 return 0x0017e388;
380}
381static inline u32 ltc_ltcs_ltss_intr3_ecc_corrected_m(void)
382{
383 return 0x1 << 7;
384}
385static inline u32 ltc_ltcs_ltss_intr3_ecc_uncorrected_m(void)
386{
387 return 0x1 << 8;
388}
389static inline u32 ltc_ltc0_lts0_intr3_r(void)
390{
391 return 0x00140588;
392}
393static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_r(void)
394{
395 return 0x001404f0;
396}
397static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_rstg_f(u32 v)
398{
399 return (v & 0x1) << 1;
400}
401static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_rstg_m(void)
402{
403 return 0x1 << 1;
404}
405static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_tstg_f(u32 v)
406{
407 return (v & 0x1) << 3;
408}
409static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_tstg_m(void)
410{
411 return 0x1 << 3;
412}
413static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_dstg_f(u32 v)
414{
415 return (v & 0x1) << 5;
416}
417static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_dstg_m(void)
418{
419 return 0x1 << 5;
420}
421static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_rstg_f(u32 v)
422{
423 return (v & 0x1) << 0;
424}
425static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_rstg_m(void)
426{
427 return 0x1 << 0;
428}
429static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_tstg_f(u32 v)
430{
431 return (v & 0x1) << 2;
432}
433static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_tstg_m(void)
434{
435 return 0x1 << 2;
436}
437static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_dstg_f(u32 v)
438{
439 return (v & 0x1) << 4;
440}
441static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_dstg_m(void)
442{
443 return 0x1 << 4;
444}
445static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_total_counter_overflow_f(u32 v)
446{
447 return (v & 0x1) << 18;
448}
449static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_total_counter_overflow_m(void)
450{
451 return 0x1 << 18;
452}
453static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_total_counter_overflow_f(u32 v)
454{
455 return (v & 0x1) << 16;
456}
457static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_total_counter_overflow_m(void)
458{
459 return 0x1 << 16;
460}
461static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_unique_counter_overflow_f(u32 v)
462{
463 return (v & 0x1) << 19;
464}
465static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_unique_counter_overflow_m(void)
466{
467 return 0x1 << 19;
468}
469static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_unique_counter_overflow_f(u32 v)
470{
471 return (v & 0x1) << 17;
472}
473static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_unique_counter_overflow_m(void)
474{
475 return 0x1 << 17;
476}
477static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_reset_f(u32 v)
478{
479 return (v & 0x1) << 30;
480}
481static inline u32 ltc_ltc0_lts0_l2_cache_ecc_status_reset_task_f(void)
482{
483 return 0x40000000;
484}
485static inline u32 ltc_ltc0_lts0_l2_cache_ecc_address_r(void)
486{
487 return 0x001404fc;
488}
489static inline u32 ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_r(void)
490{
491 return 0x001404f4;
492}
493static inline u32 ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_total_s(void)
494{
495 return 16;
496}
497static inline u32 ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_total_f(u32 v)
498{
499 return (v & 0xffff) << 0;
500}
501static inline u32 ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_total_m(void)
502{
503 return 0xffff << 0;
504}
505static inline u32 ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_total_v(u32 r)
506{
507 return (r >> 0) & 0xffff;
508}
509static inline u32 ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_unique_total_s(void)
510{
511 return 16;
512}
513static inline u32 ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_unique_total_f(u32 v)
514{
515 return (v & 0xffff) << 16;
516}
517static inline u32 ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_unique_total_m(void)
518{
519 return 0xffff << 16;
520}
521static inline u32 ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_unique_total_v(u32 r)
522{
523 return (r >> 16) & 0xffff;
524}
525static inline u32 ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_r(void)
526{
527 return 0x001404f8;
528}
529static inline u32 ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_s(void)
530{
531 return 16;
532}
533static inline u32 ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_f(u32 v)
534{
535 return (v & 0xffff) << 0;
536}
537static inline u32 ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_m(void)
538{
539 return 0xffff << 0;
540}
541static inline u32 ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_v(u32 r)
542{
543 return (r >> 0) & 0xffff;
544}
545static inline u32 ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_unique_total_s(void)
546{
547 return 16;
548}
549static inline u32 ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_unique_total_f(u32 v)
550{
551 return (v & 0xffff) << 16;
552}
553static inline u32 ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_unique_total_m(void)
554{
555 return 0xffff << 16;
556}
557static inline u32 ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_unique_total_v(u32 r)
558{
559 return (r >> 16) & 0xffff;
560}
377static inline u32 ltc_ltc0_lts0_dstg_ecc_report_r(void) 561static inline u32 ltc_ltc0_lts0_dstg_ecc_report_r(void)
378{ 562{
379 return 0x0014051c; 563 return 0x0014051c;