diff options
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 105 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h | 8 |
2 files changed, 84 insertions, 29 deletions
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index 3c04c2e4..86cc0555 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c | |||
@@ -58,6 +58,41 @@ static bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num) | |||
58 | return valid; | 58 | return valid; |
59 | } | 59 | } |
60 | 60 | ||
61 | static void gr_gp10b_sm_lrf_ecc_overcount_war(int single_err, | ||
62 | u32 sed_status, | ||
63 | u32 ded_status, | ||
64 | u32 *count_to_adjust, | ||
65 | u32 opposite_count) | ||
66 | { | ||
67 | u32 over_count = 0; | ||
68 | |||
69 | sed_status >>= gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp0_b(); | ||
70 | ded_status >>= gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp0_b(); | ||
71 | |||
72 | /* One overcount for each partition on which a SBE occurred but not a | ||
73 | DBE (or vice-versa) */ | ||
74 | if (single_err) { | ||
75 | over_count = | ||
76 | hweight32(sed_status & ~ded_status); | ||
77 | } else { | ||
78 | over_count = | ||
79 | hweight32(ded_status & ~sed_status); | ||
80 | } | ||
81 | |||
82 | /* If both a SBE and a DBE occur on the same partition, then we have an | ||
83 | overcount for the subpartition if the opposite error counts are | ||
84 | zero. */ | ||
85 | if ((sed_status & ded_status) && (opposite_count == 0)) { | ||
86 | over_count += | ||
87 | hweight32(sed_status & ded_status); | ||
88 | } | ||
89 | |||
90 | if (*count_to_adjust > over_count) | ||
91 | *count_to_adjust -= over_count; | ||
92 | else | ||
93 | *count_to_adjust = 0; | ||
94 | } | ||
95 | |||
61 | static int gr_gp10b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, | 96 | static int gr_gp10b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, |
62 | bool *post_event, struct channel_gk20a *fault_ch) | 97 | bool *post_event, struct channel_gk20a *fault_ch) |
63 | { | 98 | { |
@@ -65,50 +100,62 @@ static int gr_gp10b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, | |||
65 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | 100 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); |
66 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | 101 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); |
67 | u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; | 102 | u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; |
68 | u32 lrf_ecc_status, shm_ecc_status; | 103 | u32 lrf_ecc_status, lrf_ecc_sed_status, lrf_ecc_ded_status; |
104 | u32 lrf_single_count_delta, lrf_double_count_delta; | ||
105 | u32 shm_ecc_status; | ||
69 | 106 | ||
70 | gr_gk20a_handle_sm_exception(g, gpc, tpc, post_event, fault_ch); | 107 | gr_gk20a_handle_sm_exception(g, gpc, tpc, post_event, fault_ch); |
71 | 108 | ||
72 | /* Check for LRF ECC errors. */ | 109 | /* Check for LRF ECC errors. */ |
73 | lrf_ecc_status = gk20a_readl(g, | 110 | lrf_ecc_status = gk20a_readl(g, |
74 | gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset); | 111 | gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset); |
75 | if ( (lrf_ecc_status & | 112 | lrf_ecc_sed_status = lrf_ecc_status & |
76 | gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp0_pending_f()) || | 113 | (gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp0_pending_f() | |
77 | (lrf_ecc_status & | 114 | gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp1_pending_f() | |
78 | gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp1_pending_f()) || | 115 | gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp2_pending_f() | |
79 | (lrf_ecc_status & | 116 | gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp3_pending_f()); |
80 | gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp2_pending_f()) || | 117 | lrf_ecc_ded_status = lrf_ecc_status & |
81 | (lrf_ecc_status & | 118 | (gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp0_pending_f() | |
82 | gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp3_pending_f()) ) { | 119 | gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp1_pending_f() | |
83 | 120 | gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp2_pending_f() | | |
121 | gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp3_pending_f()); | ||
122 | lrf_single_count_delta = | ||
123 | gk20a_readl(g, | ||
124 | gr_pri_gpc0_tpc0_sm_lrf_ecc_single_err_count_r() + | ||
125 | offset); | ||
126 | lrf_double_count_delta = | ||
127 | gk20a_readl(g, | ||
128 | gr_pri_gpc0_tpc0_sm_lrf_ecc_double_err_count_r() + | ||
129 | offset); | ||
130 | gk20a_writel(g, | ||
131 | gr_pri_gpc0_tpc0_sm_lrf_ecc_single_err_count_r() + offset, | ||
132 | 0); | ||
133 | gk20a_writel(g, | ||
134 | gr_pri_gpc0_tpc0_sm_lrf_ecc_double_err_count_r() + offset, | ||
135 | 0); | ||
136 | if (lrf_ecc_sed_status) { | ||
84 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, | 137 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, |
85 | "Single bit error detected in SM LRF!"); | 138 | "Single bit error detected in SM LRF!"); |
86 | 139 | ||
140 | gr_gp10b_sm_lrf_ecc_overcount_war(1, | ||
141 | lrf_ecc_sed_status, | ||
142 | lrf_ecc_ded_status, | ||
143 | &lrf_single_count_delta, | ||
144 | lrf_double_count_delta); | ||
87 | g->gr.t18x.ecc_stats.sm_lrf_single_err_count.counters[tpc] += | 145 | g->gr.t18x.ecc_stats.sm_lrf_single_err_count.counters[tpc] += |
88 | gk20a_readl(g, | 146 | lrf_single_count_delta; |
89 | gr_pri_gpc0_tpc0_sm_lrf_ecc_single_err_count_r() + offset); | ||
90 | gk20a_writel(g, | ||
91 | gr_pri_gpc0_tpc0_sm_lrf_ecc_single_err_count_r() + offset, | ||
92 | 0); | ||
93 | } | 147 | } |
94 | if ( (lrf_ecc_status & | 148 | if (lrf_ecc_ded_status) { |
95 | gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp0_pending_f()) || | ||
96 | (lrf_ecc_status & | ||
97 | gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp1_pending_f()) || | ||
98 | (lrf_ecc_status & | ||
99 | gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp2_pending_f()) || | ||
100 | (lrf_ecc_status & | ||
101 | gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp3_pending_f()) ) { | ||
102 | |||
103 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, | 149 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, |
104 | "Double bit error detected in SM LRF!"); | 150 | "Double bit error detected in SM LRF!"); |
105 | 151 | ||
152 | gr_gp10b_sm_lrf_ecc_overcount_war(0, | ||
153 | lrf_ecc_sed_status, | ||
154 | lrf_ecc_ded_status, | ||
155 | &lrf_double_count_delta, | ||
156 | lrf_single_count_delta); | ||
106 | g->gr.t18x.ecc_stats.sm_lrf_double_err_count.counters[tpc] += | 157 | g->gr.t18x.ecc_stats.sm_lrf_double_err_count.counters[tpc] += |
107 | gk20a_readl(g, | 158 | lrf_double_count_delta; |
108 | gr_pri_gpc0_tpc0_sm_lrf_ecc_double_err_count_r() + offset); | ||
109 | gk20a_writel(g, | ||
110 | gr_pri_gpc0_tpc0_sm_lrf_ecc_double_err_count_r() + offset, | ||
111 | 0); | ||
112 | } | 159 | } |
113 | gk20a_writel(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset, | 160 | gk20a_writel(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset, |
114 | lrf_ecc_status); | 161 | lrf_ecc_status); |
diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h index f7fd3b09..12d84716 100644 --- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h +++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h | |||
@@ -470,6 +470,10 @@ static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r(void) | |||
470 | { | 470 | { |
471 | return 0x005046b8; | 471 | return 0x005046b8; |
472 | } | 472 | } |
473 | static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp0_b(void) | ||
474 | { | ||
475 | return 4; | ||
476 | } | ||
473 | static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp0_pending_f(void) | 477 | static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp0_pending_f(void) |
474 | { | 478 | { |
475 | return 0x10; | 479 | return 0x10; |
@@ -486,6 +490,10 @@ static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp3_ | |||
486 | { | 490 | { |
487 | return 0x80; | 491 | return 0x80; |
488 | } | 492 | } |
493 | static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp0_b(void) | ||
494 | { | ||
495 | return 8; | ||
496 | } | ||
489 | static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp0_pending_f(void) | 497 | static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp0_pending_f(void) |
490 | { | 498 | { |
491 | return 0x100; | 499 | return 0x100; |