diff options
author | Lakshmanan M <lm@nvidia.com> | 2017-05-19 06:10:41 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-05-19 12:44:25 -0400 |
commit | 45ca7cb8c5774cfc15015973b1883faa1d93b9e6 (patch) | |
tree | 0be9fbe523935b1f07be019f28b72f991fbf216e /drivers/gpu/nvgpu/gv11b | |
parent | 5a08eafbe076fba98de62883636ee6b0751cf7e9 (diff) |
gpu: nvgpu: gv11b: Add GCC L1.5 parity support
Add handling of GCC L1.5 parity exception.
JIRA GPUT19X-86
Change-Id: Ie83fc306d3dff79b0ddaf2616dcf0ff71fccd4ca
Signed-off-by: Lakshmanan M <lm@nvidia.com>
Reviewed-on: http://git-master/r/1485834
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b')
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 82 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/gr_gv11b.h | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/platform_gv11b_tegra.c | 24 |
3 files changed, 107 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 0c0b4261..014ba537 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c | |||
@@ -556,6 +556,84 @@ static int gr_gv11b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, | |||
556 | return ret; | 556 | return ret; |
557 | } | 557 | } |
558 | 558 | ||
559 | static int gr_gv11b_handle_gcc_exception(struct gk20a *g, u32 gpc, u32 tpc, | ||
560 | bool *post_event, struct channel_gk20a *fault_ch, | ||
561 | u32 *hww_global_esr) | ||
562 | { | ||
563 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
564 | u32 offset = gpc_stride * gpc; | ||
565 | u32 gcc_l15_ecc_status, gcc_l15_ecc_corrected_err_status = 0; | ||
566 | u32 gcc_l15_ecc_uncorrected_err_status = 0; | ||
567 | u32 gcc_l15_corrected_err_count_delta = 0; | ||
568 | u32 gcc_l15_uncorrected_err_count_delta = 0; | ||
569 | bool is_gcc_l15_ecc_corrected_total_err_overflow = 0; | ||
570 | bool is_gcc_l15_ecc_uncorrected_total_err_overflow = 0; | ||
571 | |||
572 | /* Check for gcc l15 ECC errors. */ | ||
573 | gcc_l15_ecc_status = gk20a_readl(g, | ||
574 | gr_pri_gpc0_gcc_l15_ecc_status_r() + offset); | ||
575 | gcc_l15_ecc_corrected_err_status = gcc_l15_ecc_status & | ||
576 | (gr_pri_gpc0_gcc_l15_ecc_status_corrected_err_bank0_m() | | ||
577 | gr_pri_gpc0_gcc_l15_ecc_status_corrected_err_bank1_m()); | ||
578 | gcc_l15_ecc_uncorrected_err_status = gcc_l15_ecc_status & | ||
579 | (gr_pri_gpc0_gcc_l15_ecc_status_uncorrected_err_bank0_m() | | ||
580 | gr_pri_gpc0_gcc_l15_ecc_status_uncorrected_err_bank1_m()); | ||
581 | |||
582 | if ((gcc_l15_ecc_corrected_err_status == 0) && (gcc_l15_ecc_uncorrected_err_status == 0)) | ||
583 | return 0; | ||
584 | |||
585 | gcc_l15_corrected_err_count_delta = | ||
586 | gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_total_v( | ||
587 | gk20a_readl(g, | ||
588 | gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_r() + | ||
589 | offset)); | ||
590 | gcc_l15_uncorrected_err_count_delta = | ||
591 | gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_total_v( | ||
592 | gk20a_readl(g, | ||
593 | gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_r() + | ||
594 | offset)); | ||
595 | is_gcc_l15_ecc_corrected_total_err_overflow = | ||
596 | gr_pri_gpc0_gcc_l15_ecc_status_corrected_err_total_counter_overflow_v(gcc_l15_ecc_status); | ||
597 | is_gcc_l15_ecc_uncorrected_total_err_overflow = | ||
598 | gr_pri_gpc0_gcc_l15_ecc_status_uncorrected_err_total_counter_overflow_v(gcc_l15_ecc_status); | ||
599 | |||
600 | if ((gcc_l15_corrected_err_count_delta > 0) || is_gcc_l15_ecc_corrected_total_err_overflow) { | ||
601 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_intr, | ||
602 | "corrected error (SBE) detected in GCC L1.5! err_mask [%08x] is_overf [%d]", | ||
603 | gcc_l15_ecc_corrected_err_status, is_gcc_l15_ecc_corrected_total_err_overflow); | ||
604 | |||
605 | /* HW uses 16-bits counter */ | ||
606 | gcc_l15_corrected_err_count_delta += | ||
607 | (is_gcc_l15_ecc_corrected_total_err_overflow << | ||
608 | gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_total_s()); | ||
609 | g->gr.t19x.ecc_stats.gcc_l15_corrected_err_count.counters[gpc] += | ||
610 | gcc_l15_corrected_err_count_delta; | ||
611 | gk20a_writel(g, | ||
612 | gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_r() + offset, | ||
613 | 0); | ||
614 | } | ||
615 | if ((gcc_l15_uncorrected_err_count_delta > 0) || is_gcc_l15_ecc_uncorrected_total_err_overflow) { | ||
616 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_intr, | ||
617 | "Uncorrected error (DBE) detected in GCC L1.5! err_mask [%08x] is_overf [%d]", | ||
618 | gcc_l15_ecc_uncorrected_err_status, is_gcc_l15_ecc_uncorrected_total_err_overflow); | ||
619 | |||
620 | /* HW uses 16-bits counter */ | ||
621 | gcc_l15_uncorrected_err_count_delta += | ||
622 | (is_gcc_l15_ecc_uncorrected_total_err_overflow << | ||
623 | gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_total_s()); | ||
624 | g->gr.t19x.ecc_stats.gcc_l15_uncorrected_err_count.counters[gpc] += | ||
625 | gcc_l15_uncorrected_err_count_delta; | ||
626 | gk20a_writel(g, | ||
627 | gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_r() + offset, | ||
628 | 0); | ||
629 | } | ||
630 | |||
631 | gk20a_writel(g, gr_pri_gpc0_gcc_l15_ecc_status_r() + offset, | ||
632 | gr_pri_gpc0_gcc_l15_ecc_status_reset_task_f()); | ||
633 | |||
634 | return 0; | ||
635 | } | ||
636 | |||
559 | static void gr_gv11b_enable_gpc_exceptions(struct gk20a *g) | 637 | static void gr_gv11b_enable_gpc_exceptions(struct gk20a *g) |
560 | { | 638 | { |
561 | struct gr_gk20a *gr = &g->gr; | 639 | struct gr_gk20a *gr = &g->gr; |
@@ -567,7 +645,8 @@ static void gr_gv11b_enable_gpc_exceptions(struct gk20a *g) | |||
567 | tpc_mask = | 645 | tpc_mask = |
568 | gr_gpcs_gpccs_gpc_exception_en_tpc_f((1 << gr->tpc_count) - 1); | 646 | gr_gpcs_gpccs_gpc_exception_en_tpc_f((1 << gr->tpc_count) - 1); |
569 | 647 | ||
570 | gk20a_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(), tpc_mask); | 648 | gk20a_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(), |
649 | (tpc_mask | gr_gpcs_gpccs_gpc_exception_en_gcc_f(1))); | ||
571 | } | 650 | } |
572 | 651 | ||
573 | static int gr_gv11b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, | 652 | static int gr_gv11b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, |
@@ -2113,6 +2192,7 @@ void gv11b_init_gr(struct gpu_ops *gops) | |||
2113 | gops->gr.set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask; | 2192 | gops->gr.set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask; |
2114 | gops->gr.get_access_map = gr_gv11b_get_access_map; | 2193 | gops->gr.get_access_map = gr_gv11b_get_access_map; |
2115 | gops->gr.handle_sm_exception = gr_gv11b_handle_sm_exception; | 2194 | gops->gr.handle_sm_exception = gr_gv11b_handle_sm_exception; |
2195 | gops->gr.handle_gcc_exception = gr_gv11b_handle_gcc_exception; | ||
2116 | gops->gr.handle_tex_exception = gr_gv11b_handle_tex_exception; | 2196 | gops->gr.handle_tex_exception = gr_gv11b_handle_tex_exception; |
2117 | gops->gr.enable_gpc_exceptions = gr_gv11b_enable_gpc_exceptions; | 2197 | gops->gr.enable_gpc_exceptions = gr_gv11b_enable_gpc_exceptions; |
2118 | gops->gr.mask_hww_warp_esr = gv11b_mask_hww_warp_esr; | 2198 | gops->gr.mask_hww_warp_esr = gv11b_mask_hww_warp_esr; |
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h index 5bcbe667..cf3842b6 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h | |||
@@ -45,6 +45,8 @@ struct gr_t19x { | |||
45 | struct gr_gp10b_ecc_stat sm_l1_data_uncorrected_err_count; | 45 | struct gr_gp10b_ecc_stat sm_l1_data_uncorrected_err_count; |
46 | struct gr_gp10b_ecc_stat sm_icache_corrected_err_count; | 46 | struct gr_gp10b_ecc_stat sm_icache_corrected_err_count; |
47 | struct gr_gp10b_ecc_stat sm_icache_uncorrected_err_count; | 47 | struct gr_gp10b_ecc_stat sm_icache_uncorrected_err_count; |
48 | struct gr_gp10b_ecc_stat gcc_l15_corrected_err_count; | ||
49 | struct gr_gp10b_ecc_stat gcc_l15_uncorrected_err_count; | ||
48 | } ecc_stats; | 50 | } ecc_stats; |
49 | }; | 51 | }; |
50 | 52 | ||
diff --git a/drivers/gpu/nvgpu/gv11b/platform_gv11b_tegra.c b/drivers/gpu/nvgpu/gv11b/platform_gv11b_tegra.c index 009e5716..39ae68eb 100644 --- a/drivers/gpu/nvgpu/gv11b/platform_gv11b_tegra.c +++ b/drivers/gpu/nvgpu/gv11b/platform_gv11b_tegra.c | |||
@@ -131,6 +131,8 @@ static struct device_attribute *dev_attr_sm_l1_data_ecc_corrected_err_count_arra | |||
131 | static struct device_attribute *dev_attr_sm_l1_data_ecc_uncorrected_err_count_array; | 131 | static struct device_attribute *dev_attr_sm_l1_data_ecc_uncorrected_err_count_array; |
132 | static struct device_attribute *dev_attr_sm_icache_ecc_corrected_err_count_array; | 132 | static struct device_attribute *dev_attr_sm_icache_ecc_corrected_err_count_array; |
133 | static struct device_attribute *dev_attr_sm_icache_ecc_uncorrected_err_count_array; | 133 | static struct device_attribute *dev_attr_sm_icache_ecc_uncorrected_err_count_array; |
134 | static struct device_attribute *dev_attr_gcc_l15_ecc_corrected_err_count_array; | ||
135 | static struct device_attribute *dev_attr_gcc_l15_ecc_uncorrected_err_count_array; | ||
134 | 136 | ||
135 | void gr_gv11b_create_sysfs(struct device *dev) | 137 | void gr_gv11b_create_sysfs(struct device *dev) |
136 | { | 138 | { |
@@ -193,6 +195,18 @@ void gr_gv11b_create_sysfs(struct device *dev) | |||
193 | &g->gr.t19x.ecc_stats.sm_icache_uncorrected_err_count, | 195 | &g->gr.t19x.ecc_stats.sm_icache_uncorrected_err_count, |
194 | dev_attr_sm_icache_ecc_uncorrected_err_count_array); | 196 | dev_attr_sm_icache_ecc_uncorrected_err_count_array); |
195 | 197 | ||
198 | error |= gr_gp10b_ecc_stat_create(dev, | ||
199 | 0, | ||
200 | "gcc_l15_ecc_corrected_err_count", | ||
201 | &g->gr.t19x.ecc_stats.gcc_l15_corrected_err_count, | ||
202 | dev_attr_gcc_l15_ecc_corrected_err_count_array); | ||
203 | |||
204 | error |= gr_gp10b_ecc_stat_create(dev, | ||
205 | 0, | ||
206 | "gcc_l15_ecc_uncorrected_err_count", | ||
207 | &g->gr.t19x.ecc_stats.gcc_l15_uncorrected_err_count, | ||
208 | dev_attr_gcc_l15_ecc_uncorrected_err_count_array); | ||
209 | |||
196 | if (error) | 210 | if (error) |
197 | dev_err(dev, "Failed to create gv11b sysfs attributes!\n"); | 211 | dev_err(dev, "Failed to create gv11b sysfs attributes!\n"); |
198 | } | 212 | } |
@@ -241,4 +255,14 @@ static void gr_gv11b_remove_sysfs(struct device *dev) | |||
241 | &g->gr.t19x.ecc_stats.sm_icache_uncorrected_err_count, | 255 | &g->gr.t19x.ecc_stats.sm_icache_uncorrected_err_count, |
242 | dev_attr_sm_icache_ecc_uncorrected_err_count_array); | 256 | dev_attr_sm_icache_ecc_uncorrected_err_count_array); |
243 | 257 | ||
258 | gr_gp10b_ecc_stat_remove(dev, | ||
259 | 0, | ||
260 | &g->gr.t19x.ecc_stats.gcc_l15_corrected_err_count, | ||
261 | dev_attr_gcc_l15_ecc_corrected_err_count_array); | ||
262 | |||
263 | gr_gp10b_ecc_stat_remove(dev, | ||
264 | 0, | ||
265 | &g->gr.t19x.ecc_stats.gcc_l15_uncorrected_err_count, | ||
266 | dev_attr_gcc_l15_ecc_uncorrected_err_count_array); | ||
267 | |||
244 | } | 268 | } |