diff options
author | Lakshmanan M <lm@nvidia.com> | 2017-05-19 06:10:41 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-05-19 12:44:25 -0400 |
commit | 45ca7cb8c5774cfc15015973b1883faa1d93b9e6 (patch) | |
tree | 0be9fbe523935b1f07be019f28b72f991fbf216e /drivers | |
parent | 5a08eafbe076fba98de62883636ee6b0751cf7e9 (diff) |
gpu: nvgpu: gv11b: Add GCC L1.5 parity support
Add handling of GCC L1.5 parity exception.
JIRA GPUT19X-86
Change-Id: Ie83fc306d3dff79b0ddaf2616dcf0ff71fccd4ca
Signed-off-by: Lakshmanan M <lm@nvidia.com>
Reviewed-on: http://git-master/r/1485834
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 82 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/gr_gv11b.h | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/platform_gv11b_tegra.c | 24 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h | 64 |
4 files changed, 171 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 0c0b4261..014ba537 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c | |||
@@ -556,6 +556,84 @@ static int gr_gv11b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, | |||
556 | return ret; | 556 | return ret; |
557 | } | 557 | } |
558 | 558 | ||
559 | static int gr_gv11b_handle_gcc_exception(struct gk20a *g, u32 gpc, u32 tpc, | ||
560 | bool *post_event, struct channel_gk20a *fault_ch, | ||
561 | u32 *hww_global_esr) | ||
562 | { | ||
563 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
564 | u32 offset = gpc_stride * gpc; | ||
565 | u32 gcc_l15_ecc_status, gcc_l15_ecc_corrected_err_status = 0; | ||
566 | u32 gcc_l15_ecc_uncorrected_err_status = 0; | ||
567 | u32 gcc_l15_corrected_err_count_delta = 0; | ||
568 | u32 gcc_l15_uncorrected_err_count_delta = 0; | ||
569 | bool is_gcc_l15_ecc_corrected_total_err_overflow = 0; | ||
570 | bool is_gcc_l15_ecc_uncorrected_total_err_overflow = 0; | ||
571 | |||
572 | /* Check for gcc l15 ECC errors. */ | ||
573 | gcc_l15_ecc_status = gk20a_readl(g, | ||
574 | gr_pri_gpc0_gcc_l15_ecc_status_r() + offset); | ||
575 | gcc_l15_ecc_corrected_err_status = gcc_l15_ecc_status & | ||
576 | (gr_pri_gpc0_gcc_l15_ecc_status_corrected_err_bank0_m() | | ||
577 | gr_pri_gpc0_gcc_l15_ecc_status_corrected_err_bank1_m()); | ||
578 | gcc_l15_ecc_uncorrected_err_status = gcc_l15_ecc_status & | ||
579 | (gr_pri_gpc0_gcc_l15_ecc_status_uncorrected_err_bank0_m() | | ||
580 | gr_pri_gpc0_gcc_l15_ecc_status_uncorrected_err_bank1_m()); | ||
581 | |||
582 | if ((gcc_l15_ecc_corrected_err_status == 0) && (gcc_l15_ecc_uncorrected_err_status == 0)) | ||
583 | return 0; | ||
584 | |||
585 | gcc_l15_corrected_err_count_delta = | ||
586 | gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_total_v( | ||
587 | gk20a_readl(g, | ||
588 | gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_r() + | ||
589 | offset)); | ||
590 | gcc_l15_uncorrected_err_count_delta = | ||
591 | gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_total_v( | ||
592 | gk20a_readl(g, | ||
593 | gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_r() + | ||
594 | offset)); | ||
595 | is_gcc_l15_ecc_corrected_total_err_overflow = | ||
596 | gr_pri_gpc0_gcc_l15_ecc_status_corrected_err_total_counter_overflow_v(gcc_l15_ecc_status); | ||
597 | is_gcc_l15_ecc_uncorrected_total_err_overflow = | ||
598 | gr_pri_gpc0_gcc_l15_ecc_status_uncorrected_err_total_counter_overflow_v(gcc_l15_ecc_status); | ||
599 | |||
600 | if ((gcc_l15_corrected_err_count_delta > 0) || is_gcc_l15_ecc_corrected_total_err_overflow) { | ||
601 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_intr, | ||
602 | "corrected error (SBE) detected in GCC L1.5! err_mask [%08x] is_overf [%d]", | ||
603 | gcc_l15_ecc_corrected_err_status, is_gcc_l15_ecc_corrected_total_err_overflow); | ||
604 | |||
605 | /* HW uses 16-bits counter */ | ||
606 | gcc_l15_corrected_err_count_delta += | ||
607 | (is_gcc_l15_ecc_corrected_total_err_overflow << | ||
608 | gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_total_s()); | ||
609 | g->gr.t19x.ecc_stats.gcc_l15_corrected_err_count.counters[gpc] += | ||
610 | gcc_l15_corrected_err_count_delta; | ||
611 | gk20a_writel(g, | ||
612 | gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_r() + offset, | ||
613 | 0); | ||
614 | } | ||
615 | if ((gcc_l15_uncorrected_err_count_delta > 0) || is_gcc_l15_ecc_uncorrected_total_err_overflow) { | ||
616 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_intr, | ||
617 | "Uncorrected error (DBE) detected in GCC L1.5! err_mask [%08x] is_overf [%d]", | ||
618 | gcc_l15_ecc_uncorrected_err_status, is_gcc_l15_ecc_uncorrected_total_err_overflow); | ||
619 | |||
620 | /* HW uses 16-bits counter */ | ||
621 | gcc_l15_uncorrected_err_count_delta += | ||
622 | (is_gcc_l15_ecc_uncorrected_total_err_overflow << | ||
623 | gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_total_s()); | ||
624 | g->gr.t19x.ecc_stats.gcc_l15_uncorrected_err_count.counters[gpc] += | ||
625 | gcc_l15_uncorrected_err_count_delta; | ||
626 | gk20a_writel(g, | ||
627 | gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_r() + offset, | ||
628 | 0); | ||
629 | } | ||
630 | |||
631 | gk20a_writel(g, gr_pri_gpc0_gcc_l15_ecc_status_r() + offset, | ||
632 | gr_pri_gpc0_gcc_l15_ecc_status_reset_task_f()); | ||
633 | |||
634 | return 0; | ||
635 | } | ||
636 | |||
559 | static void gr_gv11b_enable_gpc_exceptions(struct gk20a *g) | 637 | static void gr_gv11b_enable_gpc_exceptions(struct gk20a *g) |
560 | { | 638 | { |
561 | struct gr_gk20a *gr = &g->gr; | 639 | struct gr_gk20a *gr = &g->gr; |
@@ -567,7 +645,8 @@ static void gr_gv11b_enable_gpc_exceptions(struct gk20a *g) | |||
567 | tpc_mask = | 645 | tpc_mask = |
568 | gr_gpcs_gpccs_gpc_exception_en_tpc_f((1 << gr->tpc_count) - 1); | 646 | gr_gpcs_gpccs_gpc_exception_en_tpc_f((1 << gr->tpc_count) - 1); |
569 | 647 | ||
570 | gk20a_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(), tpc_mask); | 648 | gk20a_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(), |
649 | (tpc_mask | gr_gpcs_gpccs_gpc_exception_en_gcc_f(1))); | ||
571 | } | 650 | } |
572 | 651 | ||
573 | static int gr_gv11b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, | 652 | static int gr_gv11b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, |
@@ -2113,6 +2192,7 @@ void gv11b_init_gr(struct gpu_ops *gops) | |||
2113 | gops->gr.set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask; | 2192 | gops->gr.set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask; |
2114 | gops->gr.get_access_map = gr_gv11b_get_access_map; | 2193 | gops->gr.get_access_map = gr_gv11b_get_access_map; |
2115 | gops->gr.handle_sm_exception = gr_gv11b_handle_sm_exception; | 2194 | gops->gr.handle_sm_exception = gr_gv11b_handle_sm_exception; |
2195 | gops->gr.handle_gcc_exception = gr_gv11b_handle_gcc_exception; | ||
2116 | gops->gr.handle_tex_exception = gr_gv11b_handle_tex_exception; | 2196 | gops->gr.handle_tex_exception = gr_gv11b_handle_tex_exception; |
2117 | gops->gr.enable_gpc_exceptions = gr_gv11b_enable_gpc_exceptions; | 2197 | gops->gr.enable_gpc_exceptions = gr_gv11b_enable_gpc_exceptions; |
2118 | gops->gr.mask_hww_warp_esr = gv11b_mask_hww_warp_esr; | 2198 | gops->gr.mask_hww_warp_esr = gv11b_mask_hww_warp_esr; |
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h index 5bcbe667..cf3842b6 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h | |||
@@ -45,6 +45,8 @@ struct gr_t19x { | |||
45 | struct gr_gp10b_ecc_stat sm_l1_data_uncorrected_err_count; | 45 | struct gr_gp10b_ecc_stat sm_l1_data_uncorrected_err_count; |
46 | struct gr_gp10b_ecc_stat sm_icache_corrected_err_count; | 46 | struct gr_gp10b_ecc_stat sm_icache_corrected_err_count; |
47 | struct gr_gp10b_ecc_stat sm_icache_uncorrected_err_count; | 47 | struct gr_gp10b_ecc_stat sm_icache_uncorrected_err_count; |
48 | struct gr_gp10b_ecc_stat gcc_l15_corrected_err_count; | ||
49 | struct gr_gp10b_ecc_stat gcc_l15_uncorrected_err_count; | ||
48 | } ecc_stats; | 50 | } ecc_stats; |
49 | }; | 51 | }; |
50 | 52 | ||
diff --git a/drivers/gpu/nvgpu/gv11b/platform_gv11b_tegra.c b/drivers/gpu/nvgpu/gv11b/platform_gv11b_tegra.c index 009e5716..39ae68eb 100644 --- a/drivers/gpu/nvgpu/gv11b/platform_gv11b_tegra.c +++ b/drivers/gpu/nvgpu/gv11b/platform_gv11b_tegra.c | |||
@@ -131,6 +131,8 @@ static struct device_attribute *dev_attr_sm_l1_data_ecc_corrected_err_count_arra | |||
131 | static struct device_attribute *dev_attr_sm_l1_data_ecc_uncorrected_err_count_array; | 131 | static struct device_attribute *dev_attr_sm_l1_data_ecc_uncorrected_err_count_array; |
132 | static struct device_attribute *dev_attr_sm_icache_ecc_corrected_err_count_array; | 132 | static struct device_attribute *dev_attr_sm_icache_ecc_corrected_err_count_array; |
133 | static struct device_attribute *dev_attr_sm_icache_ecc_uncorrected_err_count_array; | 133 | static struct device_attribute *dev_attr_sm_icache_ecc_uncorrected_err_count_array; |
134 | static struct device_attribute *dev_attr_gcc_l15_ecc_corrected_err_count_array; | ||
135 | static struct device_attribute *dev_attr_gcc_l15_ecc_uncorrected_err_count_array; | ||
134 | 136 | ||
135 | void gr_gv11b_create_sysfs(struct device *dev) | 137 | void gr_gv11b_create_sysfs(struct device *dev) |
136 | { | 138 | { |
@@ -193,6 +195,18 @@ void gr_gv11b_create_sysfs(struct device *dev) | |||
193 | &g->gr.t19x.ecc_stats.sm_icache_uncorrected_err_count, | 195 | &g->gr.t19x.ecc_stats.sm_icache_uncorrected_err_count, |
194 | dev_attr_sm_icache_ecc_uncorrected_err_count_array); | 196 | dev_attr_sm_icache_ecc_uncorrected_err_count_array); |
195 | 197 | ||
198 | error |= gr_gp10b_ecc_stat_create(dev, | ||
199 | 0, | ||
200 | "gcc_l15_ecc_corrected_err_count", | ||
201 | &g->gr.t19x.ecc_stats.gcc_l15_corrected_err_count, | ||
202 | dev_attr_gcc_l15_ecc_corrected_err_count_array); | ||
203 | |||
204 | error |= gr_gp10b_ecc_stat_create(dev, | ||
205 | 0, | ||
206 | "gcc_l15_ecc_uncorrected_err_count", | ||
207 | &g->gr.t19x.ecc_stats.gcc_l15_uncorrected_err_count, | ||
208 | dev_attr_gcc_l15_ecc_uncorrected_err_count_array); | ||
209 | |||
196 | if (error) | 210 | if (error) |
197 | dev_err(dev, "Failed to create gv11b sysfs attributes!\n"); | 211 | dev_err(dev, "Failed to create gv11b sysfs attributes!\n"); |
198 | } | 212 | } |
@@ -241,4 +255,14 @@ static void gr_gv11b_remove_sysfs(struct device *dev) | |||
241 | &g->gr.t19x.ecc_stats.sm_icache_uncorrected_err_count, | 255 | &g->gr.t19x.ecc_stats.sm_icache_uncorrected_err_count, |
242 | dev_attr_sm_icache_ecc_uncorrected_err_count_array); | 256 | dev_attr_sm_icache_ecc_uncorrected_err_count_array); |
243 | 257 | ||
258 | gr_gp10b_ecc_stat_remove(dev, | ||
259 | 0, | ||
260 | &g->gr.t19x.ecc_stats.gcc_l15_corrected_err_count, | ||
261 | dev_attr_gcc_l15_ecc_corrected_err_count_array); | ||
262 | |||
263 | gr_gp10b_ecc_stat_remove(dev, | ||
264 | 0, | ||
265 | &g->gr.t19x.ecc_stats.gcc_l15_uncorrected_err_count, | ||
266 | dev_attr_gcc_l15_ecc_uncorrected_err_count_array); | ||
267 | |||
244 | } | 268 | } |
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h index 4ce69743..6f38cf5b 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h | |||
@@ -3370,6 +3370,10 @@ static inline u32 gr_gpcs_gpccs_gpc_exception_en_r(void) | |||
3370 | { | 3370 | { |
3371 | return 0x0041ac94; | 3371 | return 0x0041ac94; |
3372 | } | 3372 | } |
3373 | static inline u32 gr_gpcs_gpccs_gpc_exception_en_gcc_f(u32 v) | ||
3374 | { | ||
3375 | return (v & 0x1) << 2; | ||
3376 | } | ||
3373 | static inline u32 gr_gpcs_gpccs_gpc_exception_en_tpc_f(u32 v) | 3377 | static inline u32 gr_gpcs_gpccs_gpc_exception_en_tpc_f(u32 v) |
3374 | { | 3378 | { |
3375 | return (v & 0xff) << 16; | 3379 | return (v & 0xff) << 16; |
@@ -3378,6 +3382,10 @@ static inline u32 gr_gpc0_gpccs_gpc_exception_r(void) | |||
3378 | { | 3382 | { |
3379 | return 0x00502c90; | 3383 | return 0x00502c90; |
3380 | } | 3384 | } |
3385 | static inline u32 gr_gpc0_gpccs_gpc_exception_gcc_v(u32 r) | ||
3386 | { | ||
3387 | return (r >> 2) & 0x1; | ||
3388 | } | ||
3381 | static inline u32 gr_gpc0_gpccs_gpc_exception_tpc_v(u32 r) | 3389 | static inline u32 gr_gpc0_gpccs_gpc_exception_tpc_v(u32 r) |
3382 | { | 3390 | { |
3383 | return (r >> 16) & 0xff; | 3391 | return (r >> 16) & 0xff; |
@@ -3386,6 +3394,62 @@ static inline u32 gr_gpc0_gpccs_gpc_exception_tpc_0_pending_v(void) | |||
3386 | { | 3394 | { |
3387 | return 0x00000001; | 3395 | return 0x00000001; |
3388 | } | 3396 | } |
3397 | static inline u32 gr_pri_gpc0_gcc_l15_ecc_status_r(void) | ||
3398 | { | ||
3399 | return 0x00501048; | ||
3400 | } | ||
3401 | static inline u32 gr_pri_gpc0_gcc_l15_ecc_status_corrected_err_bank0_m(void) | ||
3402 | { | ||
3403 | return 0x1 << 0; | ||
3404 | } | ||
3405 | static inline u32 gr_pri_gpc0_gcc_l15_ecc_status_corrected_err_bank1_m(void) | ||
3406 | { | ||
3407 | return 0x1 << 1; | ||
3408 | } | ||
3409 | static inline u32 gr_pri_gpc0_gcc_l15_ecc_status_uncorrected_err_bank0_m(void) | ||
3410 | { | ||
3411 | return 0x1 << 4; | ||
3412 | } | ||
3413 | static inline u32 gr_pri_gpc0_gcc_l15_ecc_status_uncorrected_err_bank1_m(void) | ||
3414 | { | ||
3415 | return 0x1 << 5; | ||
3416 | } | ||
3417 | static inline u32 gr_pri_gpc0_gcc_l15_ecc_status_corrected_err_total_counter_overflow_v(u32 r) | ||
3418 | { | ||
3419 | return (r >> 8) & 0x1; | ||
3420 | } | ||
3421 | static inline u32 gr_pri_gpc0_gcc_l15_ecc_status_uncorrected_err_total_counter_overflow_v(u32 r) | ||
3422 | { | ||
3423 | return (r >> 10) & 0x1; | ||
3424 | } | ||
3425 | static inline u32 gr_pri_gpc0_gcc_l15_ecc_status_reset_task_f(void) | ||
3426 | { | ||
3427 | return 0x40000000; | ||
3428 | } | ||
3429 | static inline u32 gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_r(void) | ||
3430 | { | ||
3431 | return 0x0050104c; | ||
3432 | } | ||
3433 | static inline u32 gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_total_s(void) | ||
3434 | { | ||
3435 | return 16; | ||
3436 | } | ||
3437 | static inline u32 gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_total_v(u32 r) | ||
3438 | { | ||
3439 | return (r >> 0) & 0xffff; | ||
3440 | } | ||
3441 | static inline u32 gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_r(void) | ||
3442 | { | ||
3443 | return 0x00501054; | ||
3444 | } | ||
3445 | static inline u32 gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_total_s(void) | ||
3446 | { | ||
3447 | return 16; | ||
3448 | } | ||
3449 | static inline u32 gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_total_v(u32 r) | ||
3450 | { | ||
3451 | return (r >> 0) & 0xffff; | ||
3452 | } | ||
3389 | static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_r(void) | 3453 | static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_r(void) |
3390 | { | 3454 | { |
3391 | return 0x00504508; | 3455 | return 0x00504508; |