summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
diff options
context:
space:
mode:
authorAdeel Raza <araza@nvidia.com>2015-12-11 19:16:21 -0500
committerDeepak Nibade <dnibade@nvidia.com>2016-12-27 04:52:10 -0500
commite9b03e903c10e1fce9daf5fa7e51b8c4a0b65c95 (patch)
tree7d55abac9face13a5753fc26a9817743606e8870 /drivers/gpu/nvgpu/gp10b/gr_gp10b.c
parentf7d327985fca67266ea409e24c0ef6505d98f338 (diff)
gpu: nvgpu: gp10b: add ECC stats sysfs nodes
Add sysfs nodes for querying ECC single/double bit error counts. Bug 1699676 Change-Id: I6d5219facadaa17207ac759b88fe19077207d8f1 Signed-off-by: Adeel Raza <araza@nvidia.com> Reviewed-on: http://git-master/r/935363 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gp10b/gr_gp10b.c')
-rw-r--r--drivers/gpu/nvgpu/gp10b/gr_gp10b.c145
1 files changed, 145 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index c66dea92..90d0ce8d 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -80,6 +80,13 @@ static int gr_gp10b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,
80 80
81 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, 81 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
82 "Single bit error detected in SM LRF!"); 82 "Single bit error detected in SM LRF!");
83
84 g->gr.t18x.ecc_stats.sm_lrf_single_err_count.counters[tpc] +=
85 gk20a_readl(g,
86 gr_pri_gpc0_tpc0_sm_lrf_ecc_single_err_count_r() + offset);
87 gk20a_writel(g,
88 gr_pri_gpc0_tpc0_sm_lrf_ecc_single_err_count_r() + offset,
89 0);
83 } 90 }
84 if ( (lrf_ecc_status & 91 if ( (lrf_ecc_status &
85 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp0_pending_f()) || 92 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp0_pending_f()) ||
@@ -92,6 +99,13 @@ static int gr_gp10b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,
92 99
93 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, 100 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
94 "Double bit error detected in SM LRF!"); 101 "Double bit error detected in SM LRF!");
102
103 g->gr.t18x.ecc_stats.sm_lrf_double_err_count.counters[tpc] +=
104 gk20a_readl(g,
105 gr_pri_gpc0_tpc0_sm_lrf_ecc_double_err_count_r() + offset);
106 gk20a_writel(g,
107 gr_pri_gpc0_tpc0_sm_lrf_ecc_double_err_count_r() + offset,
108 0);
95 } 109 }
96 gk20a_writel(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset, 110 gk20a_writel(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset,
97 lrf_ecc_status); 111 lrf_ecc_status);
@@ -107,17 +121,42 @@ static int gr_gp10b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,
107 gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_detected_shm0_pending_f()) || 121 gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_detected_shm0_pending_f()) ||
108 (shm_ecc_status & 122 (shm_ecc_status &
109 gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_detected_shm1_pending_f()) ) { 123 gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_detected_shm1_pending_f()) ) {
124 u32 ecc_stats_reg_val;
110 125
111 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, 126 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
112 "Single bit error detected in SM SHM!"); 127 "Single bit error detected in SM SHM!");
128
129 ecc_stats_reg_val =
130 gk20a_readl(g,
131 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset);
132 g->gr.t18x.ecc_stats.sm_shm_sec_count.counters[tpc] +=
133 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_v(ecc_stats_reg_val);
134 g->gr.t18x.ecc_stats.sm_shm_sed_count.counters[tpc] +=
135 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_v(ecc_stats_reg_val);
136 ecc_stats_reg_val &= ~(gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_m() |
137 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_m());
138 gk20a_writel(g,
139 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset,
140 ecc_stats_reg_val);
113 } 141 }
114 if ( (shm_ecc_status & 142 if ( (shm_ecc_status &
115 gr_pri_gpc0_tpc0_sm_shm_ecc_status_double_err_detected_shm0_pending_f()) || 143 gr_pri_gpc0_tpc0_sm_shm_ecc_status_double_err_detected_shm0_pending_f()) ||
116 (shm_ecc_status & 144 (shm_ecc_status &
117 gr_pri_gpc0_tpc0_sm_shm_ecc_status_double_err_detected_shm1_pending_f()) ) { 145 gr_pri_gpc0_tpc0_sm_shm_ecc_status_double_err_detected_shm1_pending_f()) ) {
146 u32 ecc_stats_reg_val;
118 147
119 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, 148 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
120 "Double bit error detected in SM SHM!"); 149 "Double bit error detected in SM SHM!");
150
151 ecc_stats_reg_val =
152 gk20a_readl(g,
153 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset);
154 g->gr.t18x.ecc_stats.sm_shm_ded_count.counters[tpc] +=
155 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_v(ecc_stats_reg_val);
156 ecc_stats_reg_val &= ~(gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_m());
157 gk20a_writel(g,
158 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset,
159 ecc_stats_reg_val);
121 } 160 }
122 gk20a_writel(g, gr_pri_gpc0_tpc0_sm_shm_ecc_status_r() + offset, 161 gk20a_writel(g, gr_pri_gpc0_tpc0_sm_shm_ecc_status_r() + offset,
123 shm_ecc_status); 162 shm_ecc_status);
@@ -133,6 +172,7 @@ static int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
133 u32 offset = proj_gpc_stride_v() * gpc + 172 u32 offset = proj_gpc_stride_v() * gpc +
134 proj_tpc_in_gpc_stride_v() * tpc; 173 proj_tpc_in_gpc_stride_v() * tpc;
135 u32 esr; 174 u32 esr;
175 u32 ecc_stats_reg_val;
136 176
137 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); 177 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
138 178
@@ -143,10 +183,114 @@ static int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
143 if (esr & gr_gpc0_tpc0_tex_m_hww_esr_ecc_sec_pending_f()) { 183 if (esr & gr_gpc0_tpc0_tex_m_hww_esr_ecc_sec_pending_f()) {
144 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, 184 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
145 "Single bit error detected in TEX!"); 185 "Single bit error detected in TEX!");
186
187 /* Pipe 0 counters */
188 gk20a_writel(g,
189 gr_pri_gpc0_tpc0_tex_m_routing_r() + offset,
190 gr_pri_gpc0_tpc0_tex_m_routing_sel_pipe0_f());
191
192 ecc_stats_reg_val = gk20a_readl(g,
193 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset);
194 g->gr.t18x.ecc_stats.tex_total_sec_pipe0_count.counters[tpc] +=
195 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_v(ecc_stats_reg_val);
196 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_m();
197 gk20a_writel(g,
198 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset,
199 ecc_stats_reg_val);
200
201 ecc_stats_reg_val = gk20a_readl(g,
202 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset);
203 g->gr.t18x.ecc_stats.tex_unique_sec_pipe0_count.counters[tpc] +=
204 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_v(ecc_stats_reg_val);
205 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_m();
206 gk20a_writel(g,
207 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset,
208 ecc_stats_reg_val);
209
210
211 /* Pipe 1 counters */
212 gk20a_writel(g,
213 gr_pri_gpc0_tpc0_tex_m_routing_r() + offset,
214 gr_pri_gpc0_tpc0_tex_m_routing_sel_pipe1_f());
215
216 ecc_stats_reg_val = gk20a_readl(g,
217 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset);
218 g->gr.t18x.ecc_stats.tex_total_sec_pipe1_count.counters[tpc] +=
219 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_v(ecc_stats_reg_val);
220 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_m();
221 gk20a_writel(g,
222 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset,
223 ecc_stats_reg_val);
224
225 ecc_stats_reg_val = gk20a_readl(g,
226 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset);
227 g->gr.t18x.ecc_stats.tex_unique_sec_pipe1_count.counters[tpc] +=
228 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_v(ecc_stats_reg_val);
229 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_m();
230 gk20a_writel(g,
231 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset,
232 ecc_stats_reg_val);
233
234
235 gk20a_writel(g,
236 gr_pri_gpc0_tpc0_tex_m_routing_r() + offset,
237 gr_pri_gpc0_tpc0_tex_m_routing_sel_default_f());
146 } 238 }
147 if (esr & gr_gpc0_tpc0_tex_m_hww_esr_ecc_ded_pending_f()) { 239 if (esr & gr_gpc0_tpc0_tex_m_hww_esr_ecc_ded_pending_f()) {
148 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, 240 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
149 "Double bit error detected in TEX!"); 241 "Double bit error detected in TEX!");
242
243 /* Pipe 0 counters */
244 gk20a_writel(g,
245 gr_pri_gpc0_tpc0_tex_m_routing_r() + offset,
246 gr_pri_gpc0_tpc0_tex_m_routing_sel_pipe0_f());
247
248 ecc_stats_reg_val = gk20a_readl(g,
249 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset);
250 g->gr.t18x.ecc_stats.tex_total_ded_pipe0_count.counters[tpc] +=
251 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_v(ecc_stats_reg_val);
252 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_m();
253 gk20a_writel(g,
254 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset,
255 ecc_stats_reg_val);
256
257 ecc_stats_reg_val = gk20a_readl(g,
258 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset);
259 g->gr.t18x.ecc_stats.tex_unique_ded_pipe0_count.counters[tpc] +=
260 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_v(ecc_stats_reg_val);
261 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_m();
262 gk20a_writel(g,
263 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset,
264 ecc_stats_reg_val);
265
266
267 /* Pipe 1 counters */
268 gk20a_writel(g,
269 gr_pri_gpc0_tpc0_tex_m_routing_r() + offset,
270 gr_pri_gpc0_tpc0_tex_m_routing_sel_pipe1_f());
271
272 ecc_stats_reg_val = gk20a_readl(g,
273 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset);
274 g->gr.t18x.ecc_stats.tex_total_ded_pipe1_count.counters[tpc] +=
275 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_v(ecc_stats_reg_val);
276 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_m();
277 gk20a_writel(g,
278 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset,
279 ecc_stats_reg_val);
280
281 ecc_stats_reg_val = gk20a_readl(g,
282 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset);
283 g->gr.t18x.ecc_stats.tex_unique_ded_pipe1_count.counters[tpc] +=
284 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_v(ecc_stats_reg_val);
285 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_m();
286 gk20a_writel(g,
287 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset,
288 ecc_stats_reg_val);
289
290
291 gk20a_writel(g,
292 gr_pri_gpc0_tpc0_tex_m_routing_r() + offset,
293 gr_pri_gpc0_tpc0_tex_m_routing_sel_default_f());
150 } 294 }
151 295
152 gk20a_writel(g, 296 gk20a_writel(g,
@@ -1594,4 +1738,5 @@ void gp10b_init_gr(struct gpu_ops *gops)
1594 gops->gr.pre_process_sm_exception = 1738 gops->gr.pre_process_sm_exception =
1595 gr_gp10b_pre_process_sm_exception; 1739 gr_gp10b_pre_process_sm_exception;
1596 gops->gr.handle_fecs_error = gr_gp10b_handle_fecs_error; 1740 gops->gr.handle_fecs_error = gr_gp10b_handle_fecs_error;
1741 gops->gr.create_gr_sysfs = gr_gp10b_create_sysfs;
1597} 1742}