summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/gv11b/gr_gv11b.c82
-rw-r--r--drivers/gpu/nvgpu/gv11b/gr_gv11b.h2
-rw-r--r--drivers/gpu/nvgpu/gv11b/platform_gv11b_tegra.c24
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h64
4 files changed, 171 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index 0c0b4261..014ba537 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -556,6 +556,84 @@ static int gr_gv11b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,
556 return ret; 556 return ret;
557} 557}
558 558
559static int gr_gv11b_handle_gcc_exception(struct gk20a *g, u32 gpc, u32 tpc,
560 bool *post_event, struct channel_gk20a *fault_ch,
561 u32 *hww_global_esr)
562{
563 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
564 u32 offset = gpc_stride * gpc;
565 u32 gcc_l15_ecc_status, gcc_l15_ecc_corrected_err_status = 0;
566 u32 gcc_l15_ecc_uncorrected_err_status = 0;
567 u32 gcc_l15_corrected_err_count_delta = 0;
568 u32 gcc_l15_uncorrected_err_count_delta = 0;
569 bool is_gcc_l15_ecc_corrected_total_err_overflow = 0;
570 bool is_gcc_l15_ecc_uncorrected_total_err_overflow = 0;
571
572 /* Check for gcc l15 ECC errors. */
573 gcc_l15_ecc_status = gk20a_readl(g,
574 gr_pri_gpc0_gcc_l15_ecc_status_r() + offset);
575 gcc_l15_ecc_corrected_err_status = gcc_l15_ecc_status &
576 (gr_pri_gpc0_gcc_l15_ecc_status_corrected_err_bank0_m() |
577 gr_pri_gpc0_gcc_l15_ecc_status_corrected_err_bank1_m());
578 gcc_l15_ecc_uncorrected_err_status = gcc_l15_ecc_status &
579 (gr_pri_gpc0_gcc_l15_ecc_status_uncorrected_err_bank0_m() |
580 gr_pri_gpc0_gcc_l15_ecc_status_uncorrected_err_bank1_m());
581
582 if ((gcc_l15_ecc_corrected_err_status == 0) && (gcc_l15_ecc_uncorrected_err_status == 0))
583 return 0;
584
585 gcc_l15_corrected_err_count_delta =
586 gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_total_v(
587 gk20a_readl(g,
588 gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_r() +
589 offset));
590 gcc_l15_uncorrected_err_count_delta =
591 gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_total_v(
592 gk20a_readl(g,
593 gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_r() +
594 offset));
595 is_gcc_l15_ecc_corrected_total_err_overflow =
596 gr_pri_gpc0_gcc_l15_ecc_status_corrected_err_total_counter_overflow_v(gcc_l15_ecc_status);
597 is_gcc_l15_ecc_uncorrected_total_err_overflow =
598 gr_pri_gpc0_gcc_l15_ecc_status_uncorrected_err_total_counter_overflow_v(gcc_l15_ecc_status);
599
600 if ((gcc_l15_corrected_err_count_delta > 0) || is_gcc_l15_ecc_corrected_total_err_overflow) {
601 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_intr,
602 "corrected error (SBE) detected in GCC L1.5! err_mask [%08x] is_overf [%d]",
603 gcc_l15_ecc_corrected_err_status, is_gcc_l15_ecc_corrected_total_err_overflow);
604
605 /* HW uses 16-bits counter */
606 gcc_l15_corrected_err_count_delta +=
607 (is_gcc_l15_ecc_corrected_total_err_overflow <<
608 gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_total_s());
609 g->gr.t19x.ecc_stats.gcc_l15_corrected_err_count.counters[gpc] +=
610 gcc_l15_corrected_err_count_delta;
611 gk20a_writel(g,
612 gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_r() + offset,
613 0);
614 }
615 if ((gcc_l15_uncorrected_err_count_delta > 0) || is_gcc_l15_ecc_uncorrected_total_err_overflow) {
616 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_intr,
617 "Uncorrected error (DBE) detected in GCC L1.5! err_mask [%08x] is_overf [%d]",
618 gcc_l15_ecc_uncorrected_err_status, is_gcc_l15_ecc_uncorrected_total_err_overflow);
619
620 /* HW uses 16-bits counter */
621 gcc_l15_uncorrected_err_count_delta +=
622 (is_gcc_l15_ecc_uncorrected_total_err_overflow <<
623 gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_total_s());
624 g->gr.t19x.ecc_stats.gcc_l15_uncorrected_err_count.counters[gpc] +=
625 gcc_l15_uncorrected_err_count_delta;
626 gk20a_writel(g,
627 gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_r() + offset,
628 0);
629 }
630
631 gk20a_writel(g, gr_pri_gpc0_gcc_l15_ecc_status_r() + offset,
632 gr_pri_gpc0_gcc_l15_ecc_status_reset_task_f());
633
634 return 0;
635}
636
559static void gr_gv11b_enable_gpc_exceptions(struct gk20a *g) 637static void gr_gv11b_enable_gpc_exceptions(struct gk20a *g)
560{ 638{
561 struct gr_gk20a *gr = &g->gr; 639 struct gr_gk20a *gr = &g->gr;
@@ -567,7 +645,8 @@ static void gr_gv11b_enable_gpc_exceptions(struct gk20a *g)
567 tpc_mask = 645 tpc_mask =
568 gr_gpcs_gpccs_gpc_exception_en_tpc_f((1 << gr->tpc_count) - 1); 646 gr_gpcs_gpccs_gpc_exception_en_tpc_f((1 << gr->tpc_count) - 1);
569 647
570 gk20a_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(), tpc_mask); 648 gk20a_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(),
649 (tpc_mask | gr_gpcs_gpccs_gpc_exception_en_gcc_f(1)));
571} 650}
572 651
573static int gr_gv11b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, 652static int gr_gv11b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
@@ -2113,6 +2192,7 @@ void gv11b_init_gr(struct gpu_ops *gops)
2113 gops->gr.set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask; 2192 gops->gr.set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask;
2114 gops->gr.get_access_map = gr_gv11b_get_access_map; 2193 gops->gr.get_access_map = gr_gv11b_get_access_map;
2115 gops->gr.handle_sm_exception = gr_gv11b_handle_sm_exception; 2194 gops->gr.handle_sm_exception = gr_gv11b_handle_sm_exception;
2195 gops->gr.handle_gcc_exception = gr_gv11b_handle_gcc_exception;
2116 gops->gr.handle_tex_exception = gr_gv11b_handle_tex_exception; 2196 gops->gr.handle_tex_exception = gr_gv11b_handle_tex_exception;
2117 gops->gr.enable_gpc_exceptions = gr_gv11b_enable_gpc_exceptions; 2197 gops->gr.enable_gpc_exceptions = gr_gv11b_enable_gpc_exceptions;
2118 gops->gr.mask_hww_warp_esr = gv11b_mask_hww_warp_esr; 2198 gops->gr.mask_hww_warp_esr = gv11b_mask_hww_warp_esr;
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
index 5bcbe667..cf3842b6 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
@@ -45,6 +45,8 @@ struct gr_t19x {
45 struct gr_gp10b_ecc_stat sm_l1_data_uncorrected_err_count; 45 struct gr_gp10b_ecc_stat sm_l1_data_uncorrected_err_count;
46 struct gr_gp10b_ecc_stat sm_icache_corrected_err_count; 46 struct gr_gp10b_ecc_stat sm_icache_corrected_err_count;
47 struct gr_gp10b_ecc_stat sm_icache_uncorrected_err_count; 47 struct gr_gp10b_ecc_stat sm_icache_uncorrected_err_count;
48 struct gr_gp10b_ecc_stat gcc_l15_corrected_err_count;
49 struct gr_gp10b_ecc_stat gcc_l15_uncorrected_err_count;
48 } ecc_stats; 50 } ecc_stats;
49}; 51};
50 52
diff --git a/drivers/gpu/nvgpu/gv11b/platform_gv11b_tegra.c b/drivers/gpu/nvgpu/gv11b/platform_gv11b_tegra.c
index 009e5716..39ae68eb 100644
--- a/drivers/gpu/nvgpu/gv11b/platform_gv11b_tegra.c
+++ b/drivers/gpu/nvgpu/gv11b/platform_gv11b_tegra.c
@@ -131,6 +131,8 @@ static struct device_attribute *dev_attr_sm_l1_data_ecc_corrected_err_count_arra
131static struct device_attribute *dev_attr_sm_l1_data_ecc_uncorrected_err_count_array; 131static struct device_attribute *dev_attr_sm_l1_data_ecc_uncorrected_err_count_array;
132static struct device_attribute *dev_attr_sm_icache_ecc_corrected_err_count_array; 132static struct device_attribute *dev_attr_sm_icache_ecc_corrected_err_count_array;
133static struct device_attribute *dev_attr_sm_icache_ecc_uncorrected_err_count_array; 133static struct device_attribute *dev_attr_sm_icache_ecc_uncorrected_err_count_array;
134static struct device_attribute *dev_attr_gcc_l15_ecc_corrected_err_count_array;
135static struct device_attribute *dev_attr_gcc_l15_ecc_uncorrected_err_count_array;
134 136
135void gr_gv11b_create_sysfs(struct device *dev) 137void gr_gv11b_create_sysfs(struct device *dev)
136{ 138{
@@ -193,6 +195,18 @@ void gr_gv11b_create_sysfs(struct device *dev)
193 &g->gr.t19x.ecc_stats.sm_icache_uncorrected_err_count, 195 &g->gr.t19x.ecc_stats.sm_icache_uncorrected_err_count,
194 dev_attr_sm_icache_ecc_uncorrected_err_count_array); 196 dev_attr_sm_icache_ecc_uncorrected_err_count_array);
195 197
198 error |= gr_gp10b_ecc_stat_create(dev,
199 0,
200 "gcc_l15_ecc_corrected_err_count",
201 &g->gr.t19x.ecc_stats.gcc_l15_corrected_err_count,
202 dev_attr_gcc_l15_ecc_corrected_err_count_array);
203
204 error |= gr_gp10b_ecc_stat_create(dev,
205 0,
206 "gcc_l15_ecc_uncorrected_err_count",
207 &g->gr.t19x.ecc_stats.gcc_l15_uncorrected_err_count,
208 dev_attr_gcc_l15_ecc_uncorrected_err_count_array);
209
196 if (error) 210 if (error)
197 dev_err(dev, "Failed to create gv11b sysfs attributes!\n"); 211 dev_err(dev, "Failed to create gv11b sysfs attributes!\n");
198} 212}
@@ -241,4 +255,14 @@ static void gr_gv11b_remove_sysfs(struct device *dev)
241 &g->gr.t19x.ecc_stats.sm_icache_uncorrected_err_count, 255 &g->gr.t19x.ecc_stats.sm_icache_uncorrected_err_count,
242 dev_attr_sm_icache_ecc_uncorrected_err_count_array); 256 dev_attr_sm_icache_ecc_uncorrected_err_count_array);
243 257
258 gr_gp10b_ecc_stat_remove(dev,
259 0,
260 &g->gr.t19x.ecc_stats.gcc_l15_corrected_err_count,
261 dev_attr_gcc_l15_ecc_corrected_err_count_array);
262
263 gr_gp10b_ecc_stat_remove(dev,
264 0,
265 &g->gr.t19x.ecc_stats.gcc_l15_uncorrected_err_count,
266 dev_attr_gcc_l15_ecc_uncorrected_err_count_array);
267
244} 268}
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h
index 4ce69743..6f38cf5b 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h
@@ -3370,6 +3370,10 @@ static inline u32 gr_gpcs_gpccs_gpc_exception_en_r(void)
3370{ 3370{
3371 return 0x0041ac94; 3371 return 0x0041ac94;
3372} 3372}
3373static inline u32 gr_gpcs_gpccs_gpc_exception_en_gcc_f(u32 v)
3374{
3375 return (v & 0x1) << 2;
3376}
3373static inline u32 gr_gpcs_gpccs_gpc_exception_en_tpc_f(u32 v) 3377static inline u32 gr_gpcs_gpccs_gpc_exception_en_tpc_f(u32 v)
3374{ 3378{
3375 return (v & 0xff) << 16; 3379 return (v & 0xff) << 16;
@@ -3378,6 +3382,10 @@ static inline u32 gr_gpc0_gpccs_gpc_exception_r(void)
3378{ 3382{
3379 return 0x00502c90; 3383 return 0x00502c90;
3380} 3384}
3385static inline u32 gr_gpc0_gpccs_gpc_exception_gcc_v(u32 r)
3386{
3387 return (r >> 2) & 0x1;
3388}
3381static inline u32 gr_gpc0_gpccs_gpc_exception_tpc_v(u32 r) 3389static inline u32 gr_gpc0_gpccs_gpc_exception_tpc_v(u32 r)
3382{ 3390{
3383 return (r >> 16) & 0xff; 3391 return (r >> 16) & 0xff;
@@ -3386,6 +3394,62 @@ static inline u32 gr_gpc0_gpccs_gpc_exception_tpc_0_pending_v(void)
3386{ 3394{
3387 return 0x00000001; 3395 return 0x00000001;
3388} 3396}
3397static inline u32 gr_pri_gpc0_gcc_l15_ecc_status_r(void)
3398{
3399 return 0x00501048;
3400}
3401static inline u32 gr_pri_gpc0_gcc_l15_ecc_status_corrected_err_bank0_m(void)
3402{
3403 return 0x1 << 0;
3404}
3405static inline u32 gr_pri_gpc0_gcc_l15_ecc_status_corrected_err_bank1_m(void)
3406{
3407 return 0x1 << 1;
3408}
3409static inline u32 gr_pri_gpc0_gcc_l15_ecc_status_uncorrected_err_bank0_m(void)
3410{
3411 return 0x1 << 4;
3412}
3413static inline u32 gr_pri_gpc0_gcc_l15_ecc_status_uncorrected_err_bank1_m(void)
3414{
3415 return 0x1 << 5;
3416}
3417static inline u32 gr_pri_gpc0_gcc_l15_ecc_status_corrected_err_total_counter_overflow_v(u32 r)
3418{
3419 return (r >> 8) & 0x1;
3420}
3421static inline u32 gr_pri_gpc0_gcc_l15_ecc_status_uncorrected_err_total_counter_overflow_v(u32 r)
3422{
3423 return (r >> 10) & 0x1;
3424}
3425static inline u32 gr_pri_gpc0_gcc_l15_ecc_status_reset_task_f(void)
3426{
3427 return 0x40000000;
3428}
3429static inline u32 gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_r(void)
3430{
3431 return 0x0050104c;
3432}
3433static inline u32 gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_total_s(void)
3434{
3435 return 16;
3436}
3437static inline u32 gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_total_v(u32 r)
3438{
3439 return (r >> 0) & 0xffff;
3440}
3441static inline u32 gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_r(void)
3442{
3443 return 0x00501054;
3444}
3445static inline u32 gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_total_s(void)
3446{
3447 return 16;
3448}
3449static inline u32 gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_total_v(u32 r)
3450{
3451 return (r >> 0) & 0xffff;
3452}
3389static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_r(void) 3453static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_r(void)
3390{ 3454{
3391 return 0x00504508; 3455 return 0x00504508;