summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/ecc.c
diff options
context:
space:
mode:
authorRichard Zhao <rizhao@nvidia.com>2018-06-26 20:37:40 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-07-19 19:43:58 -0400
commit7f14aafc2c02eb0fab458324d0ba91a7fdea3086 (patch)
treecda9f48839fbde3444fde521a9b0069eb06cd81a /drivers/gpu/nvgpu/common/ecc.c
parent5ff1b3fe5a30c926e59a55ad25dd4daf430c8579 (diff)
gpu: nvgpu: rework ecc structure and sysfs
- create common file common/ecc.c which include common functions for add ecc counters and remove counters. - common code will create a list of all counter which make it easier to iterate all counters. - Add chip specific file for adding ecc counters. - add linux specific file os/linux/ecc_sysfs.c to export counters to sysfs. - remove obsolete code - MISRA violation for using snprintf is not solved, tracking with jira NVGPU-859 Jira NVGPUT-115 Change-Id: I1905c43c5c9b2b131199807533dee8e63ddc12f4 Signed-off-by: Richard Zhao <rizhao@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1763536 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common/ecc.c')
-rw-r--r--drivers/gpu/nvgpu/common/ecc.c369
1 files changed, 369 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/ecc.c b/drivers/gpu/nvgpu/common/ecc.c
new file mode 100644
index 00000000..b850f09e
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/ecc.c
@@ -0,0 +1,369 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#include "gk20a/gk20a.h"
24
25static void nvgpu_ecc_stat_add(struct gk20a *g, struct nvgpu_ecc_stat *stat)
26{
27 struct nvgpu_ecc *ecc = &g->ecc;
28
29 nvgpu_init_list_node(&stat->node);
30
31 nvgpu_list_add_tail(&stat->node, &ecc->stats_list);
32 ecc->stats_count++;
33}
34
35static void nvgpu_ecc_init(struct gk20a *g)
36{
37 struct nvgpu_ecc *ecc = &g->ecc;
38
39 nvgpu_init_list_node(&ecc->stats_list);
40}
41
42int nvgpu_ecc_counter_init_per_tpc(struct gk20a *g,
43 struct nvgpu_ecc_stat ***stat, const char *name)
44{
45 struct gr_gk20a *gr = &g->gr;
46 struct nvgpu_ecc_stat **stats;
47 u32 gpc, tpc;
48 int err = 0;
49
50 stats = nvgpu_kzalloc(g, sizeof(*stats) * gr->gpc_count);
51 if (stats == NULL) {
52 return -ENOMEM;
53 }
54 for (gpc = 0; gpc < gr->gpc_count; gpc++) {
55 stats[gpc] = nvgpu_kzalloc(g,
56 sizeof(*stats[gpc]) * gr->gpc_tpc_count[gpc]);
57 if (stats[gpc] == NULL) {
58 err = -ENOMEM;
59 break;
60 }
61 }
62
63 if (err != 0) {
64 while (gpc-- != 0u) {
65 nvgpu_kfree(g, stats[gpc]);
66 }
67
68 nvgpu_kfree(g, stats);
69 return err;
70 }
71
72 for (gpc = 0; gpc < gr->gpc_count; gpc++) {
73 for (tpc = 0; tpc < gr->gpc_tpc_count[gpc]; tpc++) {
74 snprintf(stats[gpc][tpc].name,
75 NVGPU_ECC_STAT_NAME_MAX_SIZE,
76 "gpc%d_tpc%d_%s", gpc, tpc, name);
77 nvgpu_ecc_stat_add(g, &stats[gpc][tpc]);
78 }
79 }
80
81 *stat = stats;
82 return 0;
83}
84
85int nvgpu_ecc_counter_init_per_gpc(struct gk20a *g,
86 struct nvgpu_ecc_stat **stat, const char *name)
87{
88 struct gr_gk20a *gr = &g->gr;
89 struct nvgpu_ecc_stat *stats;
90 u32 gpc;
91
92 stats = nvgpu_kzalloc(g, sizeof(*stats) * gr->gpc_count);
93 if (stats == NULL) {
94 return -ENOMEM;
95 }
96 for (gpc = 0; gpc < gr->gpc_count; gpc++) {
97 snprintf(stats[gpc].name, NVGPU_ECC_STAT_NAME_MAX_SIZE,
98 "gpc%d_%s", gpc, name);
99 nvgpu_ecc_stat_add(g, &stats[gpc]);
100 }
101
102 *stat = stats;
103 return 0;
104}
105
106int nvgpu_ecc_counter_init(struct gk20a *g,
107 struct nvgpu_ecc_stat **stat, const char *name)
108{
109 struct nvgpu_ecc_stat *stats;
110
111 stats = nvgpu_kzalloc(g, sizeof(*stats));
112 if (stats == NULL) {
113 return -ENOMEM;
114 }
115
116 (void)strncpy(stats->name, name, NVGPU_ECC_STAT_NAME_MAX_SIZE - 1);
117 nvgpu_ecc_stat_add(g, stats);
118 *stat = stats;
119 return 0;
120}
121
122int nvgpu_ecc_counter_init_per_lts(struct gk20a *g,
123 struct nvgpu_ecc_stat ***stat, const char *name)
124{
125 struct gr_gk20a *gr = &g->gr;
126 struct nvgpu_ecc_stat **stats;
127 u32 ltc, lts;
128 int err = 0;
129
130 stats = nvgpu_kzalloc(g, sizeof(*stats) * g->ltc_count);
131 if (stats == NULL) {
132 return -ENOMEM;
133 }
134 for (ltc = 0; ltc < g->ltc_count; ltc++) {
135 stats[ltc] = nvgpu_kzalloc(g,
136 sizeof(*stats[ltc]) * gr->slices_per_ltc);
137 if (stats[ltc] == NULL) {
138 err = -ENOMEM;
139 break;
140 }
141 }
142
143 if (err != 0) {
144 while (ltc-- > 0u) {
145 nvgpu_kfree(g, stats[ltc]);
146 }
147
148 nvgpu_kfree(g, stats);
149 return err;
150 }
151
152 for (ltc = 0; ltc < g->ltc_count; ltc++) {
153 for (lts = 0; lts < gr->slices_per_ltc; lts++) {
154 snprintf(stats[ltc][lts].name,
155 NVGPU_ECC_STAT_NAME_MAX_SIZE,
156 "ltc%d_lts%d_%s", ltc, lts, name);
157 nvgpu_ecc_stat_add(g, &stats[ltc][lts]);
158 }
159 }
160
161 *stat = stats;
162 return 0;
163}
164
165int nvgpu_ecc_counter_init_per_fbpa(struct gk20a *g,
166 struct nvgpu_ecc_stat **stat, const char *name)
167{
168 int i;
169 int num_fbpa = nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPAS);
170 struct nvgpu_ecc_stat *stats;
171
172 stats = nvgpu_kzalloc(g, sizeof(*stats) * num_fbpa);
173 if (stats == NULL) {
174 return -ENOMEM;
175 }
176
177 for (i = 0; i < num_fbpa; i++) {
178 snprintf(stats[i].name, NVGPU_ECC_STAT_NAME_MAX_SIZE,
179 "fbpa%d_%s", i, name);
180 nvgpu_ecc_stat_add(g, &stats[i]);
181 }
182
183 *stat = stats;
184 return 0;
185}
186
187/* release all ecc_stat */
188void nvgpu_ecc_free(struct gk20a *g)
189{
190 struct nvgpu_ecc *ecc = &g->ecc;
191 struct gr_gk20a *gr = &g->gr;
192 u32 i;
193
194 for (i = 0; i < gr->gpc_count; i++) {
195 if (ecc->gr.sm_lrf_ecc_single_err_count != NULL) {
196 nvgpu_kfree(g, ecc->gr.sm_lrf_ecc_single_err_count[i]);
197 }
198
199 if (ecc->gr.sm_lrf_ecc_double_err_count != NULL) {
200 nvgpu_kfree(g, ecc->gr.sm_lrf_ecc_double_err_count[i]);
201 }
202
203 if (ecc->gr.sm_shm_ecc_sec_count != NULL) {
204 nvgpu_kfree(g, ecc->gr.sm_shm_ecc_sec_count[i]);
205 }
206
207 if (ecc->gr.sm_shm_ecc_sed_count != NULL) {
208 nvgpu_kfree(g, ecc->gr.sm_shm_ecc_sed_count[i]);
209 }
210
211 if (ecc->gr.sm_shm_ecc_ded_count != NULL) {
212 nvgpu_kfree(g, ecc->gr.sm_shm_ecc_ded_count[i]);
213 }
214
215 if (ecc->gr.tex_ecc_total_sec_pipe0_count != NULL) {
216 nvgpu_kfree(g, ecc->gr.tex_ecc_total_sec_pipe0_count[i]);
217 }
218
219 if (ecc->gr.tex_ecc_total_ded_pipe0_count != NULL) {
220 nvgpu_kfree(g, ecc->gr.tex_ecc_total_ded_pipe0_count[i]);
221 }
222
223 if (ecc->gr.tex_unique_ecc_sec_pipe0_count != NULL) {
224 nvgpu_kfree(g, ecc->gr.tex_unique_ecc_sec_pipe0_count[i]);
225 }
226
227 if (ecc->gr.tex_unique_ecc_ded_pipe0_count != NULL) {
228 nvgpu_kfree(g, ecc->gr.tex_unique_ecc_ded_pipe0_count[i]);
229 }
230
231 if (ecc->gr.tex_ecc_total_sec_pipe1_count != NULL) {
232 nvgpu_kfree(g, ecc->gr.tex_ecc_total_sec_pipe1_count[i]);
233 }
234
235 if (ecc->gr.tex_ecc_total_ded_pipe1_count != NULL) {
236 nvgpu_kfree(g, ecc->gr.tex_ecc_total_ded_pipe1_count[i]);
237 }
238
239 if (ecc->gr.tex_unique_ecc_sec_pipe1_count != NULL) {
240 nvgpu_kfree(g, ecc->gr.tex_unique_ecc_sec_pipe1_count[i]);
241 }
242
243 if (ecc->gr.tex_unique_ecc_ded_pipe1_count != NULL) {
244 nvgpu_kfree(g, ecc->gr.tex_unique_ecc_ded_pipe1_count[i]);
245 }
246
247 if (ecc->gr.sm_l1_tag_ecc_corrected_err_count != NULL) {
248 nvgpu_kfree(g, ecc->gr.sm_l1_tag_ecc_corrected_err_count[i]);
249 }
250
251 if (ecc->gr.sm_l1_tag_ecc_uncorrected_err_count != NULL) {
252 nvgpu_kfree(g, ecc->gr.sm_l1_tag_ecc_uncorrected_err_count[i]);
253 }
254
255 if (ecc->gr.sm_cbu_ecc_corrected_err_count != NULL) {
256 nvgpu_kfree(g, ecc->gr.sm_cbu_ecc_corrected_err_count[i]);
257 }
258
259 if (ecc->gr.sm_cbu_ecc_uncorrected_err_count != NULL) {
260 nvgpu_kfree(g, ecc->gr.sm_cbu_ecc_uncorrected_err_count[i]);
261 }
262
263 if (ecc->gr.sm_l1_data_ecc_corrected_err_count != NULL) {
264 nvgpu_kfree(g, ecc->gr.sm_l1_data_ecc_corrected_err_count[i]);
265 }
266
267 if (ecc->gr.sm_l1_data_ecc_uncorrected_err_count != NULL) {
268 nvgpu_kfree(g, ecc->gr.sm_l1_data_ecc_uncorrected_err_count[i]);
269 }
270
271 if (ecc->gr.sm_icache_ecc_corrected_err_count != NULL) {
272 nvgpu_kfree(g, ecc->gr.sm_icache_ecc_corrected_err_count[i]);
273 }
274
275 if (ecc->gr.sm_icache_ecc_uncorrected_err_count != NULL) {
276 nvgpu_kfree(g, ecc->gr.sm_icache_ecc_uncorrected_err_count[i]);
277 }
278 }
279 nvgpu_kfree(g, ecc->gr.sm_lrf_ecc_single_err_count);
280 nvgpu_kfree(g, ecc->gr.sm_lrf_ecc_double_err_count);
281 nvgpu_kfree(g, ecc->gr.sm_shm_ecc_sec_count);
282 nvgpu_kfree(g, ecc->gr.sm_shm_ecc_sed_count);
283 nvgpu_kfree(g, ecc->gr.sm_shm_ecc_ded_count);
284 nvgpu_kfree(g, ecc->gr.tex_ecc_total_sec_pipe0_count);
285 nvgpu_kfree(g, ecc->gr.tex_ecc_total_ded_pipe0_count);
286 nvgpu_kfree(g, ecc->gr.tex_unique_ecc_sec_pipe0_count);
287 nvgpu_kfree(g, ecc->gr.tex_unique_ecc_ded_pipe0_count);
288 nvgpu_kfree(g, ecc->gr.tex_ecc_total_sec_pipe1_count);
289 nvgpu_kfree(g, ecc->gr.tex_ecc_total_ded_pipe1_count);
290 nvgpu_kfree(g, ecc->gr.tex_unique_ecc_sec_pipe1_count);
291 nvgpu_kfree(g, ecc->gr.tex_unique_ecc_ded_pipe1_count);
292 nvgpu_kfree(g, ecc->gr.sm_l1_tag_ecc_corrected_err_count);
293 nvgpu_kfree(g, ecc->gr.sm_l1_tag_ecc_uncorrected_err_count);
294 nvgpu_kfree(g, ecc->gr.sm_cbu_ecc_corrected_err_count);
295 nvgpu_kfree(g, ecc->gr.sm_cbu_ecc_uncorrected_err_count);
296 nvgpu_kfree(g, ecc->gr.sm_l1_data_ecc_corrected_err_count);
297 nvgpu_kfree(g, ecc->gr.sm_l1_data_ecc_uncorrected_err_count);
298 nvgpu_kfree(g, ecc->gr.sm_icache_ecc_corrected_err_count);
299 nvgpu_kfree(g, ecc->gr.sm_icache_ecc_uncorrected_err_count);
300
301 nvgpu_kfree(g, ecc->gr.gcc_l15_ecc_corrected_err_count);
302 nvgpu_kfree(g, ecc->gr.gcc_l15_ecc_uncorrected_err_count);
303 nvgpu_kfree(g, ecc->gr.gpccs_ecc_corrected_err_count);
304 nvgpu_kfree(g, ecc->gr.gpccs_ecc_uncorrected_err_count);
305 nvgpu_kfree(g, ecc->gr.mmu_l1tlb_ecc_corrected_err_count);
306 nvgpu_kfree(g, ecc->gr.mmu_l1tlb_ecc_uncorrected_err_count);
307 nvgpu_kfree(g, ecc->gr.fecs_ecc_corrected_err_count);
308 nvgpu_kfree(g, ecc->gr.fecs_ecc_uncorrected_err_count);
309
310 for (i = 0; i < g->ltc_count; i++) {
311 if (ecc->ltc.ecc_sec_count != NULL) {
312 nvgpu_kfree(g, ecc->ltc.ecc_sec_count[i]);
313 }
314
315 if (ecc->ltc.ecc_ded_count != NULL) {
316 nvgpu_kfree(g, ecc->ltc.ecc_ded_count[i]);
317 }
318 }
319 nvgpu_kfree(g, ecc->ltc.ecc_sec_count);
320 nvgpu_kfree(g, ecc->ltc.ecc_ded_count);
321
322 nvgpu_kfree(g, ecc->fb.mmu_l2tlb_ecc_corrected_err_count);
323 nvgpu_kfree(g, ecc->fb.mmu_l2tlb_ecc_uncorrected_err_count);
324 nvgpu_kfree(g, ecc->fb.mmu_hubtlb_ecc_corrected_err_count);
325 nvgpu_kfree(g, ecc->fb.mmu_hubtlb_ecc_uncorrected_err_count);
326 nvgpu_kfree(g, ecc->fb.mmu_fillunit_ecc_corrected_err_count);
327 nvgpu_kfree(g, ecc->fb.mmu_fillunit_ecc_uncorrected_err_count);
328
329 nvgpu_kfree(g, ecc->pmu.pmu_ecc_corrected_err_count);
330 nvgpu_kfree(g, ecc->pmu.pmu_ecc_uncorrected_err_count);
331
332 nvgpu_kfree(g, ecc->fbpa.fbpa_ecc_sec_err_count);
333 nvgpu_kfree(g, ecc->fbpa.fbpa_ecc_ded_err_count);
334
335 (void)memset(ecc, 0, sizeof(*ecc));
336}
337
338int nvgpu_ecc_init_support(struct gk20a *g)
339{
340 int err;
341
342 if (g->ops.gr.init_ecc == NULL) {
343 return 0;
344 }
345
346 nvgpu_ecc_init(g);
347 err = g->ops.gr.init_ecc(g);
348 if (err != 0) {
349 return err;
350 }
351
352 err = nvgpu_ecc_sysfs_init(g);
353 if (err != 0) {
354 nvgpu_ecc_free(g);
355 return err;
356 }
357
358 return 0;
359}
360
361void nvgpu_ecc_remove_support(struct gk20a *g)
362{
363 if (g->ops.gr.init_ecc == NULL) {
364 return;
365 }
366
367 nvgpu_ecc_sysfs_remove(g);
368 nvgpu_ecc_free(g);
369}