summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRichard Zhao <rizhao@nvidia.com>2018-06-26 20:37:40 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-07-19 19:43:58 -0400
commit7f14aafc2c02eb0fab458324d0ba91a7fdea3086 (patch)
treecda9f48839fbde3444fde521a9b0069eb06cd81a
parent5ff1b3fe5a30c926e59a55ad25dd4daf430c8579 (diff)
gpu: nvgpu: rework ecc structure and sysfs
- create common file common/ecc.c which include common functions for add ecc counters and remove counters. - common code will create a list of all counter which make it easier to iterate all counters. - Add chip specific file for adding ecc counters. - add linux specific file os/linux/ecc_sysfs.c to export counters to sysfs. - remove obsolete code - MISRA violation for using snprintf is not solved, tracking with jira NVGPU-859 Jira NVGPUT-115 Change-Id: I1905c43c5c9b2b131199807533dee8e63ddc12f4 Signed-off-by: Richard Zhao <rizhao@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1763536 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/Makefile7
-rw-r--r--drivers/gpu/nvgpu/Makefile.sources3
-rw-r--r--drivers/gpu/nvgpu/common/ecc.c369
-rw-r--r--drivers/gpu/nvgpu/common/fb/fb_gv11b.c24
-rw-r--r--drivers/gpu/nvgpu/common/posix/stubs.c11
-rw-r--r--drivers/gpu/nvgpu/gk20a/ecc_gk20a.h102
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h8
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c8
-rw-r--r--drivers/gpu/nvgpu/gp106/hal_gp106.c4
-rw-r--r--drivers/gpu/nvgpu/gp10b/ecc_gp10b.c106
-rw-r--r--drivers/gpu/nvgpu/gp10b/ecc_gp10b.h28
-rw-r--r--drivers/gpu/nvgpu/gp10b/gr_gp10b.c26
-rw-r--r--drivers/gpu/nvgpu/gp10b/hal_gp10b.c6
-rw-r--r--drivers/gpu/nvgpu/gp10b/ltc_gp10b.c4
-rw-r--r--drivers/gpu/nvgpu/gv100/hal_gv100.c4
-rw-r--r--drivers/gpu/nvgpu/gv11b/ecc_gv11b.c181
-rw-r--r--drivers/gpu/nvgpu/gv11b/ecc_gv11b.h28
-rw-r--r--drivers/gpu/nvgpu/gv11b/gr_gv11b.c48
-rw-r--r--drivers/gpu/nvgpu/gv11b/hal_gv11b.c6
-rw-r--r--drivers/gpu/nvgpu/gv11b/ltc_gv11b.c10
-rw-r--r--drivers/gpu/nvgpu/gv11b/pmu_gv11b.c8
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/ecc.h162
-rw-r--r--drivers/gpu/nvgpu/os/linux/ecc_sysfs.c80
-rw-r--r--drivers/gpu/nvgpu/os/linux/os_linux.h1
-rw-r--r--drivers/gpu/nvgpu/os/linux/pci.c5
-rw-r--r--drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.c269
-rw-r--r--drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.h37
-rw-r--r--drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c165
-rw-r--r--drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.h1
-rw-r--r--drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c331
-rw-r--r--drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c4
-rw-r--r--drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c4
32 files changed, 1044 insertions, 1006 deletions
diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index 72795e08..90858e55 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -69,7 +69,8 @@ nvgpu-y += \
69 os/linux/sim_pci.o \ 69 os/linux/sim_pci.o \
70 os/linux/os_sched.o \ 70 os/linux/os_sched.o \
71 os/linux/nvlink.o \ 71 os/linux/nvlink.o \
72 os/linux/dt.o 72 os/linux/dt.o \
73 os/linux/ecc_sysfs.o
73 74
74nvgpu-$(CONFIG_GK20A_VIDMEM) += \ 75nvgpu-$(CONFIG_GK20A_VIDMEM) += \
75 os/linux/dmabuf_vidmem.o 76 os/linux/dmabuf_vidmem.o
@@ -100,7 +101,6 @@ nvgpu-$(CONFIG_TEGRA_GK20A) += \
100 os/linux/module_usermode.o \ 101 os/linux/module_usermode.o \
101 os/linux/soc.o \ 102 os/linux/soc.o \
102 os/linux/fuse.o \ 103 os/linux/fuse.o \
103 os/linux/platform_ecc_sysfs.o \
104 os/linux/platform_gk20a_tegra.o \ 104 os/linux/platform_gk20a_tegra.o \
105 os/linux/platform_gp10b_tegra.o \ 105 os/linux/platform_gp10b_tegra.o \
106 os/linux/platform_gv11b_tegra.o 106 os/linux/platform_gv11b_tegra.o
@@ -185,6 +185,7 @@ nvgpu-y += \
185 common/sim.o \ 185 common/sim.o \
186 common/sim_pci.o \ 186 common/sim_pci.o \
187 common/fifo/submit.o \ 187 common/fifo/submit.o \
188 common/ecc.o \
188 gk20a/gk20a.o \ 189 gk20a/gk20a.o \
189 gk20a/ce2_gk20a.o \ 190 gk20a/ce2_gk20a.o \
190 gk20a/fifo_gk20a.o \ 191 gk20a/fifo_gk20a.o \
@@ -267,6 +268,7 @@ nvgpu-y += \
267 gp10b/priv_ring_gp10b.o \ 268 gp10b/priv_ring_gp10b.o \
268 gp10b/gp10b.o \ 269 gp10b/gp10b.o \
269 gp10b/fuse_gp10b.o \ 270 gp10b/fuse_gp10b.o \
271 gp10b/ecc_gp10b.o \
270 gp106/hal_gp106.o \ 272 gp106/hal_gp106.o \
271 gp106/mm_gp106.o \ 273 gp106/mm_gp106.o \
272 gp106/flcn_gp106.o \ 274 gp106/flcn_gp106.o \
@@ -296,6 +298,7 @@ nvgpu-y += \
296 gv11b/subctx_gv11b.o \ 298 gv11b/subctx_gv11b.o \
297 gv11b/regops_gv11b.o \ 299 gv11b/regops_gv11b.o \
298 gv11b/therm_gv11b.o \ 300 gv11b/therm_gv11b.o \
301 gv11b/ecc_gv11b.o \
299 gv100/mm_gv100.o \ 302 gv100/mm_gv100.o \
300 gv100/gr_ctx_gv100.o \ 303 gv100/gr_ctx_gv100.o \
301 gv100/bios_gv100.o \ 304 gv100/bios_gv100.o \
diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources
index 55d7201c..8095f6ba 100644
--- a/drivers/gpu/nvgpu/Makefile.sources
+++ b/drivers/gpu/nvgpu/Makefile.sources
@@ -49,6 +49,7 @@ srcs := common/mm/nvgpu_allocator.c \
49 common/rbtree.c \ 49 common/rbtree.c \
50 common/ltc.c \ 50 common/ltc.c \
51 common/io_common.c \ 51 common/io_common.c \
52 common/ecc.c \
52 common/vbios/bios.c \ 53 common/vbios/bios.c \
53 common/falcon/falcon.c \ 54 common/falcon/falcon.c \
54 common/pmu/pmu.c \ 55 common/pmu/pmu.c \
@@ -166,6 +167,7 @@ srcs := common/mm/nvgpu_allocator.c \
166 gp10b/priv_ring_gp10b.c \ 167 gp10b/priv_ring_gp10b.c \
167 gp10b/gp10b.c \ 168 gp10b/gp10b.c \
168 gp10b/fuse_gp10b.c \ 169 gp10b/fuse_gp10b.c \
170 gp10b/ecc_gp10b.c \
169 gv11b/gv11b.c \ 171 gv11b/gv11b.c \
170 gv11b/dbg_gpu_gv11b.c \ 172 gv11b/dbg_gpu_gv11b.c \
171 gv11b/mc_gv11b.c \ 173 gv11b/mc_gv11b.c \
@@ -181,6 +183,7 @@ srcs := common/mm/nvgpu_allocator.c \
181 gv11b/subctx_gv11b.c \ 183 gv11b/subctx_gv11b.c \
182 gv11b/regops_gv11b.c \ 184 gv11b/regops_gv11b.c \
183 gv11b/therm_gv11b.c \ 185 gv11b/therm_gv11b.c \
186 gv11b/ecc_gv11b.c \
184 gp106/hal_gp106.c \ 187 gp106/hal_gp106.c \
185 gp106/mm_gp106.c \ 188 gp106/mm_gp106.c \
186 gp106/flcn_gp106.c \ 189 gp106/flcn_gp106.c \
diff --git a/drivers/gpu/nvgpu/common/ecc.c b/drivers/gpu/nvgpu/common/ecc.c
new file mode 100644
index 00000000..b850f09e
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/ecc.c
@@ -0,0 +1,369 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#include "gk20a/gk20a.h"
24
25static void nvgpu_ecc_stat_add(struct gk20a *g, struct nvgpu_ecc_stat *stat)
26{
27 struct nvgpu_ecc *ecc = &g->ecc;
28
29 nvgpu_init_list_node(&stat->node);
30
31 nvgpu_list_add_tail(&stat->node, &ecc->stats_list);
32 ecc->stats_count++;
33}
34
35static void nvgpu_ecc_init(struct gk20a *g)
36{
37 struct nvgpu_ecc *ecc = &g->ecc;
38
39 nvgpu_init_list_node(&ecc->stats_list);
40}
41
42int nvgpu_ecc_counter_init_per_tpc(struct gk20a *g,
43 struct nvgpu_ecc_stat ***stat, const char *name)
44{
45 struct gr_gk20a *gr = &g->gr;
46 struct nvgpu_ecc_stat **stats;
47 u32 gpc, tpc;
48 int err = 0;
49
50 stats = nvgpu_kzalloc(g, sizeof(*stats) * gr->gpc_count);
51 if (stats == NULL) {
52 return -ENOMEM;
53 }
54 for (gpc = 0; gpc < gr->gpc_count; gpc++) {
55 stats[gpc] = nvgpu_kzalloc(g,
56 sizeof(*stats[gpc]) * gr->gpc_tpc_count[gpc]);
57 if (stats[gpc] == NULL) {
58 err = -ENOMEM;
59 break;
60 }
61 }
62
63 if (err != 0) {
64 while (gpc-- != 0u) {
65 nvgpu_kfree(g, stats[gpc]);
66 }
67
68 nvgpu_kfree(g, stats);
69 return err;
70 }
71
72 for (gpc = 0; gpc < gr->gpc_count; gpc++) {
73 for (tpc = 0; tpc < gr->gpc_tpc_count[gpc]; tpc++) {
74 snprintf(stats[gpc][tpc].name,
75 NVGPU_ECC_STAT_NAME_MAX_SIZE,
76 "gpc%d_tpc%d_%s", gpc, tpc, name);
77 nvgpu_ecc_stat_add(g, &stats[gpc][tpc]);
78 }
79 }
80
81 *stat = stats;
82 return 0;
83}
84
85int nvgpu_ecc_counter_init_per_gpc(struct gk20a *g,
86 struct nvgpu_ecc_stat **stat, const char *name)
87{
88 struct gr_gk20a *gr = &g->gr;
89 struct nvgpu_ecc_stat *stats;
90 u32 gpc;
91
92 stats = nvgpu_kzalloc(g, sizeof(*stats) * gr->gpc_count);
93 if (stats == NULL) {
94 return -ENOMEM;
95 }
96 for (gpc = 0; gpc < gr->gpc_count; gpc++) {
97 snprintf(stats[gpc].name, NVGPU_ECC_STAT_NAME_MAX_SIZE,
98 "gpc%d_%s", gpc, name);
99 nvgpu_ecc_stat_add(g, &stats[gpc]);
100 }
101
102 *stat = stats;
103 return 0;
104}
105
106int nvgpu_ecc_counter_init(struct gk20a *g,
107 struct nvgpu_ecc_stat **stat, const char *name)
108{
109 struct nvgpu_ecc_stat *stats;
110
111 stats = nvgpu_kzalloc(g, sizeof(*stats));
112 if (stats == NULL) {
113 return -ENOMEM;
114 }
115
116 (void)strncpy(stats->name, name, NVGPU_ECC_STAT_NAME_MAX_SIZE - 1);
117 nvgpu_ecc_stat_add(g, stats);
118 *stat = stats;
119 return 0;
120}
121
122int nvgpu_ecc_counter_init_per_lts(struct gk20a *g,
123 struct nvgpu_ecc_stat ***stat, const char *name)
124{
125 struct gr_gk20a *gr = &g->gr;
126 struct nvgpu_ecc_stat **stats;
127 u32 ltc, lts;
128 int err = 0;
129
130 stats = nvgpu_kzalloc(g, sizeof(*stats) * g->ltc_count);
131 if (stats == NULL) {
132 return -ENOMEM;
133 }
134 for (ltc = 0; ltc < g->ltc_count; ltc++) {
135 stats[ltc] = nvgpu_kzalloc(g,
136 sizeof(*stats[ltc]) * gr->slices_per_ltc);
137 if (stats[ltc] == NULL) {
138 err = -ENOMEM;
139 break;
140 }
141 }
142
143 if (err != 0) {
144 while (ltc-- > 0u) {
145 nvgpu_kfree(g, stats[ltc]);
146 }
147
148 nvgpu_kfree(g, stats);
149 return err;
150 }
151
152 for (ltc = 0; ltc < g->ltc_count; ltc++) {
153 for (lts = 0; lts < gr->slices_per_ltc; lts++) {
154 snprintf(stats[ltc][lts].name,
155 NVGPU_ECC_STAT_NAME_MAX_SIZE,
156 "ltc%d_lts%d_%s", ltc, lts, name);
157 nvgpu_ecc_stat_add(g, &stats[ltc][lts]);
158 }
159 }
160
161 *stat = stats;
162 return 0;
163}
164
165int nvgpu_ecc_counter_init_per_fbpa(struct gk20a *g,
166 struct nvgpu_ecc_stat **stat, const char *name)
167{
168 int i;
169 int num_fbpa = nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPAS);
170 struct nvgpu_ecc_stat *stats;
171
172 stats = nvgpu_kzalloc(g, sizeof(*stats) * num_fbpa);
173 if (stats == NULL) {
174 return -ENOMEM;
175 }
176
177 for (i = 0; i < num_fbpa; i++) {
178 snprintf(stats[i].name, NVGPU_ECC_STAT_NAME_MAX_SIZE,
179 "fbpa%d_%s", i, name);
180 nvgpu_ecc_stat_add(g, &stats[i]);
181 }
182
183 *stat = stats;
184 return 0;
185}
186
187/* release all ecc_stat */
188void nvgpu_ecc_free(struct gk20a *g)
189{
190 struct nvgpu_ecc *ecc = &g->ecc;
191 struct gr_gk20a *gr = &g->gr;
192 u32 i;
193
194 for (i = 0; i < gr->gpc_count; i++) {
195 if (ecc->gr.sm_lrf_ecc_single_err_count != NULL) {
196 nvgpu_kfree(g, ecc->gr.sm_lrf_ecc_single_err_count[i]);
197 }
198
199 if (ecc->gr.sm_lrf_ecc_double_err_count != NULL) {
200 nvgpu_kfree(g, ecc->gr.sm_lrf_ecc_double_err_count[i]);
201 }
202
203 if (ecc->gr.sm_shm_ecc_sec_count != NULL) {
204 nvgpu_kfree(g, ecc->gr.sm_shm_ecc_sec_count[i]);
205 }
206
207 if (ecc->gr.sm_shm_ecc_sed_count != NULL) {
208 nvgpu_kfree(g, ecc->gr.sm_shm_ecc_sed_count[i]);
209 }
210
211 if (ecc->gr.sm_shm_ecc_ded_count != NULL) {
212 nvgpu_kfree(g, ecc->gr.sm_shm_ecc_ded_count[i]);
213 }
214
215 if (ecc->gr.tex_ecc_total_sec_pipe0_count != NULL) {
216 nvgpu_kfree(g, ecc->gr.tex_ecc_total_sec_pipe0_count[i]);
217 }
218
219 if (ecc->gr.tex_ecc_total_ded_pipe0_count != NULL) {
220 nvgpu_kfree(g, ecc->gr.tex_ecc_total_ded_pipe0_count[i]);
221 }
222
223 if (ecc->gr.tex_unique_ecc_sec_pipe0_count != NULL) {
224 nvgpu_kfree(g, ecc->gr.tex_unique_ecc_sec_pipe0_count[i]);
225 }
226
227 if (ecc->gr.tex_unique_ecc_ded_pipe0_count != NULL) {
228 nvgpu_kfree(g, ecc->gr.tex_unique_ecc_ded_pipe0_count[i]);
229 }
230
231 if (ecc->gr.tex_ecc_total_sec_pipe1_count != NULL) {
232 nvgpu_kfree(g, ecc->gr.tex_ecc_total_sec_pipe1_count[i]);
233 }
234
235 if (ecc->gr.tex_ecc_total_ded_pipe1_count != NULL) {
236 nvgpu_kfree(g, ecc->gr.tex_ecc_total_ded_pipe1_count[i]);
237 }
238
239 if (ecc->gr.tex_unique_ecc_sec_pipe1_count != NULL) {
240 nvgpu_kfree(g, ecc->gr.tex_unique_ecc_sec_pipe1_count[i]);
241 }
242
243 if (ecc->gr.tex_unique_ecc_ded_pipe1_count != NULL) {
244 nvgpu_kfree(g, ecc->gr.tex_unique_ecc_ded_pipe1_count[i]);
245 }
246
247 if (ecc->gr.sm_l1_tag_ecc_corrected_err_count != NULL) {
248 nvgpu_kfree(g, ecc->gr.sm_l1_tag_ecc_corrected_err_count[i]);
249 }
250
251 if (ecc->gr.sm_l1_tag_ecc_uncorrected_err_count != NULL) {
252 nvgpu_kfree(g, ecc->gr.sm_l1_tag_ecc_uncorrected_err_count[i]);
253 }
254
255 if (ecc->gr.sm_cbu_ecc_corrected_err_count != NULL) {
256 nvgpu_kfree(g, ecc->gr.sm_cbu_ecc_corrected_err_count[i]);
257 }
258
259 if (ecc->gr.sm_cbu_ecc_uncorrected_err_count != NULL) {
260 nvgpu_kfree(g, ecc->gr.sm_cbu_ecc_uncorrected_err_count[i]);
261 }
262
263 if (ecc->gr.sm_l1_data_ecc_corrected_err_count != NULL) {
264 nvgpu_kfree(g, ecc->gr.sm_l1_data_ecc_corrected_err_count[i]);
265 }
266
267 if (ecc->gr.sm_l1_data_ecc_uncorrected_err_count != NULL) {
268 nvgpu_kfree(g, ecc->gr.sm_l1_data_ecc_uncorrected_err_count[i]);
269 }
270
271 if (ecc->gr.sm_icache_ecc_corrected_err_count != NULL) {
272 nvgpu_kfree(g, ecc->gr.sm_icache_ecc_corrected_err_count[i]);
273 }
274
275 if (ecc->gr.sm_icache_ecc_uncorrected_err_count != NULL) {
276 nvgpu_kfree(g, ecc->gr.sm_icache_ecc_uncorrected_err_count[i]);
277 }
278 }
279 nvgpu_kfree(g, ecc->gr.sm_lrf_ecc_single_err_count);
280 nvgpu_kfree(g, ecc->gr.sm_lrf_ecc_double_err_count);
281 nvgpu_kfree(g, ecc->gr.sm_shm_ecc_sec_count);
282 nvgpu_kfree(g, ecc->gr.sm_shm_ecc_sed_count);
283 nvgpu_kfree(g, ecc->gr.sm_shm_ecc_ded_count);
284 nvgpu_kfree(g, ecc->gr.tex_ecc_total_sec_pipe0_count);
285 nvgpu_kfree(g, ecc->gr.tex_ecc_total_ded_pipe0_count);
286 nvgpu_kfree(g, ecc->gr.tex_unique_ecc_sec_pipe0_count);
287 nvgpu_kfree(g, ecc->gr.tex_unique_ecc_ded_pipe0_count);
288 nvgpu_kfree(g, ecc->gr.tex_ecc_total_sec_pipe1_count);
289 nvgpu_kfree(g, ecc->gr.tex_ecc_total_ded_pipe1_count);
290 nvgpu_kfree(g, ecc->gr.tex_unique_ecc_sec_pipe1_count);
291 nvgpu_kfree(g, ecc->gr.tex_unique_ecc_ded_pipe1_count);
292 nvgpu_kfree(g, ecc->gr.sm_l1_tag_ecc_corrected_err_count);
293 nvgpu_kfree(g, ecc->gr.sm_l1_tag_ecc_uncorrected_err_count);
294 nvgpu_kfree(g, ecc->gr.sm_cbu_ecc_corrected_err_count);
295 nvgpu_kfree(g, ecc->gr.sm_cbu_ecc_uncorrected_err_count);
296 nvgpu_kfree(g, ecc->gr.sm_l1_data_ecc_corrected_err_count);
297 nvgpu_kfree(g, ecc->gr.sm_l1_data_ecc_uncorrected_err_count);
298 nvgpu_kfree(g, ecc->gr.sm_icache_ecc_corrected_err_count);
299 nvgpu_kfree(g, ecc->gr.sm_icache_ecc_uncorrected_err_count);
300
301 nvgpu_kfree(g, ecc->gr.gcc_l15_ecc_corrected_err_count);
302 nvgpu_kfree(g, ecc->gr.gcc_l15_ecc_uncorrected_err_count);
303 nvgpu_kfree(g, ecc->gr.gpccs_ecc_corrected_err_count);
304 nvgpu_kfree(g, ecc->gr.gpccs_ecc_uncorrected_err_count);
305 nvgpu_kfree(g, ecc->gr.mmu_l1tlb_ecc_corrected_err_count);
306 nvgpu_kfree(g, ecc->gr.mmu_l1tlb_ecc_uncorrected_err_count);
307 nvgpu_kfree(g, ecc->gr.fecs_ecc_corrected_err_count);
308 nvgpu_kfree(g, ecc->gr.fecs_ecc_uncorrected_err_count);
309
310 for (i = 0; i < g->ltc_count; i++) {
311 if (ecc->ltc.ecc_sec_count != NULL) {
312 nvgpu_kfree(g, ecc->ltc.ecc_sec_count[i]);
313 }
314
315 if (ecc->ltc.ecc_ded_count != NULL) {
316 nvgpu_kfree(g, ecc->ltc.ecc_ded_count[i]);
317 }
318 }
319 nvgpu_kfree(g, ecc->ltc.ecc_sec_count);
320 nvgpu_kfree(g, ecc->ltc.ecc_ded_count);
321
322 nvgpu_kfree(g, ecc->fb.mmu_l2tlb_ecc_corrected_err_count);
323 nvgpu_kfree(g, ecc->fb.mmu_l2tlb_ecc_uncorrected_err_count);
324 nvgpu_kfree(g, ecc->fb.mmu_hubtlb_ecc_corrected_err_count);
325 nvgpu_kfree(g, ecc->fb.mmu_hubtlb_ecc_uncorrected_err_count);
326 nvgpu_kfree(g, ecc->fb.mmu_fillunit_ecc_corrected_err_count);
327 nvgpu_kfree(g, ecc->fb.mmu_fillunit_ecc_uncorrected_err_count);
328
329 nvgpu_kfree(g, ecc->pmu.pmu_ecc_corrected_err_count);
330 nvgpu_kfree(g, ecc->pmu.pmu_ecc_uncorrected_err_count);
331
332 nvgpu_kfree(g, ecc->fbpa.fbpa_ecc_sec_err_count);
333 nvgpu_kfree(g, ecc->fbpa.fbpa_ecc_ded_err_count);
334
335 (void)memset(ecc, 0, sizeof(*ecc));
336}
337
338int nvgpu_ecc_init_support(struct gk20a *g)
339{
340 int err;
341
342 if (g->ops.gr.init_ecc == NULL) {
343 return 0;
344 }
345
346 nvgpu_ecc_init(g);
347 err = g->ops.gr.init_ecc(g);
348 if (err != 0) {
349 return err;
350 }
351
352 err = nvgpu_ecc_sysfs_init(g);
353 if (err != 0) {
354 nvgpu_ecc_free(g);
355 return err;
356 }
357
358 return 0;
359}
360
361void nvgpu_ecc_remove_support(struct gk20a *g)
362{
363 if (g->ops.gr.init_ecc == NULL) {
364 return;
365 }
366
367 nvgpu_ecc_sysfs_remove(g);
368 nvgpu_ecc_free(g);
369}
diff --git a/drivers/gpu/nvgpu/common/fb/fb_gv11b.c b/drivers/gpu/nvgpu/common/fb/fb_gv11b.c
index 26dabd72..53f04188 100644
--- a/drivers/gpu/nvgpu/common/fb/fb_gv11b.c
+++ b/drivers/gpu/nvgpu/common/fb/fb_gv11b.c
@@ -445,9 +445,9 @@ void gv11b_handle_l2tlb_ecc_isr(struct gk20a *g, u32 ecc_status)
445 uncorrected_delta += (0x1UL << fb_mmu_l2tlb_ecc_uncorrected_err_count_total_s()); 445 uncorrected_delta += (0x1UL << fb_mmu_l2tlb_ecc_uncorrected_err_count_total_s());
446 446
447 447
448 g->ecc.fb.mmu_l2tlb_corrected_err_count.counters[0] += 448 g->ecc.fb.mmu_l2tlb_ecc_corrected_err_count[0].counter +=
449 corrected_delta; 449 corrected_delta;
450 g->ecc.fb.mmu_l2tlb_uncorrected_err_count.counters[0] += 450 g->ecc.fb.mmu_l2tlb_ecc_uncorrected_err_count[0].counter +=
451 uncorrected_delta; 451 uncorrected_delta;
452 452
453 if (ecc_status & fb_mmu_l2tlb_ecc_status_corrected_err_l2tlb_sa_data_m()) 453 if (ecc_status & fb_mmu_l2tlb_ecc_status_corrected_err_l2tlb_sa_data_m())
@@ -461,8 +461,8 @@ void gv11b_handle_l2tlb_ecc_isr(struct gk20a *g, u32 ecc_status)
461 "ecc error address: 0x%x", ecc_addr); 461 "ecc error address: 0x%x", ecc_addr);
462 nvgpu_log(g, gpu_dbg_intr, 462 nvgpu_log(g, gpu_dbg_intr,
463 "ecc error count corrected: %d, uncorrected %d", 463 "ecc error count corrected: %d, uncorrected %d",
464 g->ecc.fb.mmu_l2tlb_corrected_err_count.counters[0], 464 g->ecc.fb.mmu_l2tlb_ecc_corrected_err_count[0].counter,
465 g->ecc.fb.mmu_l2tlb_uncorrected_err_count.counters[0]); 465 g->ecc.fb.mmu_l2tlb_ecc_uncorrected_err_count[0].counter);
466} 466}
467 467
468void gv11b_handle_hubtlb_ecc_isr(struct gk20a *g, u32 ecc_status) 468void gv11b_handle_hubtlb_ecc_isr(struct gk20a *g, u32 ecc_status)
@@ -503,9 +503,9 @@ void gv11b_handle_hubtlb_ecc_isr(struct gk20a *g, u32 ecc_status)
503 uncorrected_delta += (0x1UL << fb_mmu_hubtlb_ecc_uncorrected_err_count_total_s()); 503 uncorrected_delta += (0x1UL << fb_mmu_hubtlb_ecc_uncorrected_err_count_total_s());
504 504
505 505
506 g->ecc.fb.mmu_hubtlb_corrected_err_count.counters[0] += 506 g->ecc.fb.mmu_hubtlb_ecc_corrected_err_count[0].counter +=
507 corrected_delta; 507 corrected_delta;
508 g->ecc.fb.mmu_hubtlb_uncorrected_err_count.counters[0] += 508 g->ecc.fb.mmu_hubtlb_ecc_uncorrected_err_count[0].counter +=
509 uncorrected_delta; 509 uncorrected_delta;
510 510
511 if (ecc_status & fb_mmu_hubtlb_ecc_status_corrected_err_sa_data_m()) 511 if (ecc_status & fb_mmu_hubtlb_ecc_status_corrected_err_sa_data_m())
@@ -519,8 +519,8 @@ void gv11b_handle_hubtlb_ecc_isr(struct gk20a *g, u32 ecc_status)
519 "ecc error address: 0x%x", ecc_addr); 519 "ecc error address: 0x%x", ecc_addr);
520 nvgpu_log(g, gpu_dbg_intr, 520 nvgpu_log(g, gpu_dbg_intr,
521 "ecc error count corrected: %d, uncorrected %d", 521 "ecc error count corrected: %d, uncorrected %d",
522 g->ecc.fb.mmu_hubtlb_corrected_err_count.counters[0], 522 g->ecc.fb.mmu_hubtlb_ecc_corrected_err_count[0].counter,
523 g->ecc.fb.mmu_hubtlb_uncorrected_err_count.counters[0]); 523 g->ecc.fb.mmu_hubtlb_ecc_uncorrected_err_count[0].counter);
524} 524}
525 525
526void gv11b_handle_fillunit_ecc_isr(struct gk20a *g, u32 ecc_status) 526void gv11b_handle_fillunit_ecc_isr(struct gk20a *g, u32 ecc_status)
@@ -561,9 +561,9 @@ void gv11b_handle_fillunit_ecc_isr(struct gk20a *g, u32 ecc_status)
561 uncorrected_delta += (0x1UL << fb_mmu_fillunit_ecc_uncorrected_err_count_total_s()); 561 uncorrected_delta += (0x1UL << fb_mmu_fillunit_ecc_uncorrected_err_count_total_s());
562 562
563 563
564 g->ecc.fb.mmu_fillunit_corrected_err_count.counters[0] += 564 g->ecc.fb.mmu_fillunit_ecc_corrected_err_count[0].counter +=
565 corrected_delta; 565 corrected_delta;
566 g->ecc.fb.mmu_fillunit_uncorrected_err_count.counters[0] += 566 g->ecc.fb.mmu_fillunit_ecc_uncorrected_err_count[0].counter +=
567 uncorrected_delta; 567 uncorrected_delta;
568 568
569 if (ecc_status & fb_mmu_fillunit_ecc_status_corrected_err_pte_data_m()) 569 if (ecc_status & fb_mmu_fillunit_ecc_status_corrected_err_pte_data_m())
@@ -582,8 +582,8 @@ void gv11b_handle_fillunit_ecc_isr(struct gk20a *g, u32 ecc_status)
582 "ecc error address: 0x%x", ecc_addr); 582 "ecc error address: 0x%x", ecc_addr);
583 nvgpu_log(g, gpu_dbg_intr, 583 nvgpu_log(g, gpu_dbg_intr,
584 "ecc error count corrected: %d, uncorrected %d", 584 "ecc error count corrected: %d, uncorrected %d",
585 g->ecc.fb.mmu_fillunit_corrected_err_count.counters[0], 585 g->ecc.fb.mmu_fillunit_ecc_corrected_err_count[0].counter,
586 g->ecc.fb.mmu_fillunit_uncorrected_err_count.counters[0]); 586 g->ecc.fb.mmu_fillunit_ecc_uncorrected_err_count[0].counter);
587} 587}
588 588
589static void gv11b_fb_parse_mmfault(struct mmu_fault_info *mmfault) 589static void gv11b_fb_parse_mmfault(struct mmu_fault_info *mmfault)
diff --git a/drivers/gpu/nvgpu/common/posix/stubs.c b/drivers/gpu/nvgpu/common/posix/stubs.c
index d6270692..0fa80bff 100644
--- a/drivers/gpu/nvgpu/common/posix/stubs.c
+++ b/drivers/gpu/nvgpu/common/posix/stubs.c
@@ -25,8 +25,19 @@
25 * for an implementation. 25 * for an implementation.
26 */ 26 */
27 27
28#include <nvgpu/ecc.h>
29
28#include "gk20a/dbg_gpu_gk20a.h" 30#include "gk20a/dbg_gpu_gk20a.h"
29 31
30void nvgpu_dbg_session_post_event(struct dbg_session_gk20a *dbg_s) 32void nvgpu_dbg_session_post_event(struct dbg_session_gk20a *dbg_s)
31{ 33{
32} 34}
35
36int nvgpu_ecc_sysfs_init(struct gk20a *g)
37{
38 return 0;
39}
40
41void nvgpu_ecc_sysfs_remove(struct gk20a *g)
42{
43}
diff --git a/drivers/gpu/nvgpu/gk20a/ecc_gk20a.h b/drivers/gpu/nvgpu/gk20a/ecc_gk20a.h
deleted file mode 100644
index 9c50a809..00000000
--- a/drivers/gpu/nvgpu/gk20a/ecc_gk20a.h
+++ /dev/null
@@ -1,102 +0,0 @@
1/*
2 * GK20A ECC
3 *
4 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24#ifndef ECC_GK20A_H
25#define ECC_GK20A_H
26
27struct gk20a_ecc_stat {
28 char **names;
29 u32 *counters;
30 u32 count;
31#ifdef CONFIG_SYSFS
32 struct hlist_node hash_node;
33 struct device_attribute *attr_array;
34#endif
35};
36
37struct ecc_gk20a {
38 /* Stats per engine */
39 struct {
40 struct gk20a_ecc_stat sm_lrf_single_err_count;
41 struct gk20a_ecc_stat sm_lrf_double_err_count;
42
43 struct gk20a_ecc_stat sm_shm_sec_count;
44 struct gk20a_ecc_stat sm_shm_sed_count;
45 struct gk20a_ecc_stat sm_shm_ded_count;
46
47 struct gk20a_ecc_stat tex_total_sec_pipe0_count;
48 struct gk20a_ecc_stat tex_total_ded_pipe0_count;
49 struct gk20a_ecc_stat tex_unique_sec_pipe0_count;
50 struct gk20a_ecc_stat tex_unique_ded_pipe0_count;
51 struct gk20a_ecc_stat tex_total_sec_pipe1_count;
52 struct gk20a_ecc_stat tex_total_ded_pipe1_count;
53 struct gk20a_ecc_stat tex_unique_sec_pipe1_count;
54 struct gk20a_ecc_stat tex_unique_ded_pipe1_count;
55
56 struct gk20a_ecc_stat sm_l1_tag_corrected_err_count;
57 struct gk20a_ecc_stat sm_l1_tag_uncorrected_err_count;
58 struct gk20a_ecc_stat sm_cbu_corrected_err_count;
59 struct gk20a_ecc_stat sm_cbu_uncorrected_err_count;
60 struct gk20a_ecc_stat sm_l1_data_corrected_err_count;
61 struct gk20a_ecc_stat sm_l1_data_uncorrected_err_count;
62 struct gk20a_ecc_stat sm_icache_corrected_err_count;
63 struct gk20a_ecc_stat sm_icache_uncorrected_err_count;
64 struct gk20a_ecc_stat gcc_l15_corrected_err_count;
65 struct gk20a_ecc_stat gcc_l15_uncorrected_err_count;
66 struct gk20a_ecc_stat fecs_corrected_err_count;
67 struct gk20a_ecc_stat fecs_uncorrected_err_count;
68 struct gk20a_ecc_stat gpccs_corrected_err_count;
69 struct gk20a_ecc_stat gpccs_uncorrected_err_count;
70 struct gk20a_ecc_stat mmu_l1tlb_corrected_err_count;
71 struct gk20a_ecc_stat mmu_l1tlb_uncorrected_err_count;
72 } gr;
73
74 struct {
75 struct gk20a_ecc_stat l2_sec_count;
76 struct gk20a_ecc_stat l2_ded_count;
77 struct gk20a_ecc_stat l2_cache_corrected_err_count;
78 struct gk20a_ecc_stat l2_cache_uncorrected_err_count;
79 } ltc;
80
81 struct {
82 struct gk20a_ecc_stat mmu_l2tlb_corrected_err_count;
83 struct gk20a_ecc_stat mmu_l2tlb_uncorrected_err_count;
84 struct gk20a_ecc_stat mmu_hubtlb_corrected_err_count;
85 struct gk20a_ecc_stat mmu_hubtlb_uncorrected_err_count;
86 struct gk20a_ecc_stat mmu_fillunit_corrected_err_count;
87 struct gk20a_ecc_stat mmu_fillunit_uncorrected_err_count;
88 } fb;
89
90 struct {
91 struct gk20a_ecc_stat pmu_corrected_err_count;
92 struct gk20a_ecc_stat pmu_uncorrected_err_count;
93 } pmu;
94
95 struct {
96 struct gk20a_ecc_stat fbpa_sec_err_count;
97 struct gk20a_ecc_stat fbpa_ded_err_count;
98 } fbpa;
99
100};
101
102#endif /*__ECC_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 7cb8462f..e69036d7 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -35,7 +35,6 @@ struct gk20a_ctxsw_trace;
35struct acr_desc; 35struct acr_desc;
36struct nvgpu_mem_alloc_tracker; 36struct nvgpu_mem_alloc_tracker;
37struct dbg_profiler_object_data; 37struct dbg_profiler_object_data;
38struct ecc_gk20a;
39struct gk20a_debug_output; 38struct gk20a_debug_output;
40struct nvgpu_clk_pll_debug_data; 39struct nvgpu_clk_pll_debug_data;
41struct nvgpu_nvhost_dev; 40struct nvgpu_nvhost_dev;
@@ -64,6 +63,7 @@ struct nvgpu_ctxsw_trace_filter;
64#include <nvgpu/clk_arb.h> 63#include <nvgpu/clk_arb.h>
65#include <nvgpu/nvlink.h> 64#include <nvgpu/nvlink.h>
66#include <nvgpu/sim.h> 65#include <nvgpu/sim.h>
66#include <nvgpu/ecc.h>
67 67
68#include "clk_gk20a.h" 68#include "clk_gk20a.h"
69#include "ce2_gk20a.h" 69#include "ce2_gk20a.h"
@@ -77,7 +77,6 @@ struct nvgpu_ctxsw_trace_filter;
77#include "perf/perf.h" 77#include "perf/perf.h"
78#include "pmgr/pmgr.h" 78#include "pmgr/pmgr.h"
79#include "therm/thrm.h" 79#include "therm/thrm.h"
80#include "ecc_gk20a.h"
81 80
82/* PTIMER_REF_FREQ_HZ corresponds to a period of 32 nanoseconds. 81/* PTIMER_REF_FREQ_HZ corresponds to a period of 32 nanoseconds.
83 32 ns is the resolution of ptimer. */ 82 32 ns is the resolution of ptimer. */
@@ -384,8 +383,7 @@ struct gpu_ops {
384 u32 gpc_exception); 383 u32 gpc_exception);
385 void (*enable_gpc_exceptions)(struct gk20a *g); 384 void (*enable_gpc_exceptions)(struct gk20a *g);
386 void (*enable_exceptions)(struct gk20a *g); 385 void (*enable_exceptions)(struct gk20a *g);
387 void (*create_gr_sysfs)(struct gk20a *g); 386 int (*init_ecc)(struct gk20a *g);
388 void (*remove_gr_sysfs)(struct gk20a *g);
389 u32 (*get_lrf_tex_ltc_dram_override)(struct gk20a *g); 387 u32 (*get_lrf_tex_ltc_dram_override)(struct gk20a *g);
390 int (*record_sm_error_state)(struct gk20a *g, u32 gpc, u32 tpc, 388 int (*record_sm_error_state)(struct gk20a *g, u32 gpc, u32 tpc,
391 u32 sm, struct channel_gk20a *fault_ch); 389 u32 sm, struct channel_gk20a *fault_ch);
@@ -1385,7 +1383,7 @@ struct gk20a {
1385 struct mm_gk20a mm; 1383 struct mm_gk20a mm;
1386 struct nvgpu_pmu pmu; 1384 struct nvgpu_pmu pmu;
1387 struct acr_desc acr; 1385 struct acr_desc acr;
1388 struct ecc_gk20a ecc; 1386 struct nvgpu_ecc ecc;
1389 struct clk_pmupstate clk_pmu; 1387 struct clk_pmupstate clk_pmu;
1390 struct perf_pmupstate perf_pmu; 1388 struct perf_pmupstate perf_pmu;
1391 struct pmgr_pmupstate pmgr_pmu; 1389 struct pmgr_pmupstate pmgr_pmu;
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index c70c1cd4..38570041 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -38,6 +38,7 @@
38#include <nvgpu/mm.h> 38#include <nvgpu/mm.h>
39#include <nvgpu/ctxsw_trace.h> 39#include <nvgpu/ctxsw_trace.h>
40#include <nvgpu/error_notifier.h> 40#include <nvgpu/error_notifier.h>
41#include <nvgpu/ecc.h>
41 42
42#include "gk20a.h" 43#include "gk20a.h"
43#include "gr_gk20a.h" 44#include "gr_gk20a.h"
@@ -3127,6 +3128,8 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr)
3127 gr->ctx_vars.hwpm_ctxsw_buffer_offset_map = NULL; 3128 gr->ctx_vars.hwpm_ctxsw_buffer_offset_map = NULL;
3128 3129
3129 gk20a_comptag_allocator_destroy(g, &gr->comp_tags); 3130 gk20a_comptag_allocator_destroy(g, &gr->comp_tags);
3131
3132 nvgpu_ecc_remove_support(g);
3130} 3133}
3131 3134
3132static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) 3135static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
@@ -4872,8 +4875,9 @@ static int gk20a_init_gr_setup_sw(struct gk20a *g)
4872 gr->remove_support = gk20a_remove_gr_support; 4875 gr->remove_support = gk20a_remove_gr_support;
4873 gr->sw_ready = true; 4876 gr->sw_ready = true;
4874 4877
4875 if (g->ops.gr.create_gr_sysfs) 4878 err = nvgpu_ecc_init_support(g);
4876 g->ops.gr.create_gr_sysfs(g); 4879 if (err)
4880 goto clean_up;
4877 4881
4878 nvgpu_log_fn(g, "done"); 4882 nvgpu_log_fn(g, "done");
4879 return 0; 4883 return 0;
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index 9490ec10..eb150ce8 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -382,10 +382,6 @@ static const struct gpu_ops gp106_ops = {
382 .update_boosted_ctx = NULL, 382 .update_boosted_ctx = NULL,
383 .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3, 383 .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
384 .set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4, 384 .set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4,
385#ifdef CONFIG_SYSFS
386 .create_gr_sysfs = NULL,
387 .remove_gr_sysfs = NULL,
388#endif
389 .set_ctxsw_preemption_mode = gr_gp106_set_ctxsw_preemption_mode, 385 .set_ctxsw_preemption_mode = gr_gp106_set_ctxsw_preemption_mode,
390 .load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode, 386 .load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode,
391 .fecs_host_int_enable = gr_gk20a_fecs_host_int_enable, 387 .fecs_host_int_enable = gr_gk20a_fecs_host_int_enable,
diff --git a/drivers/gpu/nvgpu/gp10b/ecc_gp10b.c b/drivers/gpu/nvgpu/gp10b/ecc_gp10b.c
new file mode 100644
index 00000000..cf95c0d7
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/ecc_gp10b.c
@@ -0,0 +1,106 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#include <nvgpu/ecc.h>
24
25#include "gk20a/gk20a.h"
26#include "gp10b/ecc_gp10b.h"
27
28int gp10b_ecc_init(struct gk20a *g)
29{
30 int err = 0;
31
32 err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_lrf_ecc_single_err_count);
33 if (err != 0) {
34 goto done;
35 }
36 err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_lrf_ecc_double_err_count);
37 if (err != 0) {
38 goto done;
39 }
40
41 err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_shm_ecc_sec_count);
42 if (err != 0) {
43 goto done;
44 }
45 err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_shm_ecc_sed_count);
46 if (err != 0) {
47 goto done;
48 }
49 err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_shm_ecc_ded_count);
50 if (err != 0) {
51 goto done;
52 }
53
54 err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_ecc_total_sec_pipe0_count);
55 if (err != 0) {
56 goto done;
57 }
58 err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_ecc_total_ded_pipe0_count);
59 if (err != 0) {
60 goto done;
61 }
62
63 err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_unique_ecc_sec_pipe0_count);
64 if (err != 0) {
65 goto done;
66 }
67 err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_unique_ecc_ded_pipe0_count);
68 if (err != 0) {
69 goto done;
70 }
71
72 err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_ecc_total_sec_pipe1_count);
73 if (err != 0) {
74 goto done;
75 }
76 err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_ecc_total_ded_pipe1_count);
77 if (err != 0) {
78 goto done;
79 }
80
81 err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_unique_ecc_sec_pipe1_count);
82 if (err != 0) {
83 goto done;
84 }
85 err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_unique_ecc_ded_pipe1_count);
86 if (err != 0) {
87 goto done;
88 }
89
90 err = NVGPU_ECC_COUNTER_INIT_PER_LTS(ecc_sec_count);
91 if (err != 0) {
92 goto done;
93 }
94 err = NVGPU_ECC_COUNTER_INIT_PER_LTS(ecc_ded_count);
95 if (err != 0) {
96 goto done;
97 }
98
99done:
100 if (err != 0) {
101 nvgpu_err(g, "ecc counter allocate failed, err=%d", err);
102 nvgpu_ecc_free(g);
103 }
104
105 return err;
106}
diff --git a/drivers/gpu/nvgpu/gp10b/ecc_gp10b.h b/drivers/gpu/nvgpu/gp10b/ecc_gp10b.h
new file mode 100644
index 00000000..e5101db0
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/ecc_gp10b.h
@@ -0,0 +1,28 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#ifndef __ECC_GP10B_H__
24#define __ECC_GP10B_H__
25
26int gp10b_ecc_init(struct gk20a *g);
27
28#endif
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 16eddeca..17c4e8b7 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -176,7 +176,7 @@ int gr_gp10b_handle_sm_exception(struct gk20a *g,
176 lrf_ecc_ded_status, 176 lrf_ecc_ded_status,
177 &lrf_single_count_delta, 177 &lrf_single_count_delta,
178 lrf_double_count_delta); 178 lrf_double_count_delta);
179 g->ecc.gr.sm_lrf_single_err_count.counters[tpc] += 179 g->ecc.gr.sm_lrf_ecc_single_err_count[gpc][tpc].counter +=
180 lrf_single_count_delta; 180 lrf_single_count_delta;
181 } 181 }
182 if (lrf_ecc_ded_status) { 182 if (lrf_ecc_ded_status) {
@@ -188,7 +188,7 @@ int gr_gp10b_handle_sm_exception(struct gk20a *g,
188 lrf_ecc_ded_status, 188 lrf_ecc_ded_status,
189 &lrf_double_count_delta, 189 &lrf_double_count_delta,
190 lrf_single_count_delta); 190 lrf_single_count_delta);
191 g->ecc.gr.sm_lrf_double_err_count.counters[tpc] += 191 g->ecc.gr.sm_lrf_ecc_double_err_count[gpc][tpc].counter +=
192 lrf_double_count_delta; 192 lrf_double_count_delta;
193 } 193 }
194 gk20a_writel(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset, 194 gk20a_writel(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset,
@@ -213,9 +213,9 @@ int gr_gp10b_handle_sm_exception(struct gk20a *g,
213 ecc_stats_reg_val = 213 ecc_stats_reg_val =
214 gk20a_readl(g, 214 gk20a_readl(g,
215 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset); 215 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset);
216 g->ecc.gr.sm_shm_sec_count.counters[tpc] += 216 g->ecc.gr.sm_shm_ecc_sec_count[gpc][tpc].counter +=
217 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_v(ecc_stats_reg_val); 217 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_v(ecc_stats_reg_val);
218 g->ecc.gr.sm_shm_sed_count.counters[tpc] += 218 g->ecc.gr.sm_shm_ecc_sed_count[gpc][tpc].counter +=
219 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_v(ecc_stats_reg_val); 219 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_v(ecc_stats_reg_val);
220 ecc_stats_reg_val &= ~(gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_m() | 220 ecc_stats_reg_val &= ~(gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_m() |
221 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_m()); 221 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_m());
@@ -235,7 +235,7 @@ int gr_gp10b_handle_sm_exception(struct gk20a *g,
235 ecc_stats_reg_val = 235 ecc_stats_reg_val =
236 gk20a_readl(g, 236 gk20a_readl(g,
237 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset); 237 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset);
238 g->ecc.gr.sm_shm_ded_count.counters[tpc] += 238 g->ecc.gr.sm_shm_ecc_ded_count[gpc][tpc].counter +=
239 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_v(ecc_stats_reg_val); 239 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_v(ecc_stats_reg_val);
240 ecc_stats_reg_val &= ~(gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_m()); 240 ecc_stats_reg_val &= ~(gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_m());
241 gk20a_writel(g, 241 gk20a_writel(g,
@@ -276,7 +276,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
276 276
277 ecc_stats_reg_val = gk20a_readl(g, 277 ecc_stats_reg_val = gk20a_readl(g,
278 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset); 278 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset);
279 g->ecc.gr.tex_total_sec_pipe0_count.counters[tpc] += 279 g->ecc.gr.tex_ecc_total_sec_pipe0_count[gpc][tpc].counter +=
280 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_v(ecc_stats_reg_val); 280 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_v(ecc_stats_reg_val);
281 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_m(); 281 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_m();
282 gk20a_writel(g, 282 gk20a_writel(g,
@@ -285,7 +285,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
285 285
286 ecc_stats_reg_val = gk20a_readl(g, 286 ecc_stats_reg_val = gk20a_readl(g,
287 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset); 287 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset);
288 g->ecc.gr.tex_unique_sec_pipe0_count.counters[tpc] += 288 g->ecc.gr.tex_unique_ecc_sec_pipe0_count[gpc][tpc].counter +=
289 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_v(ecc_stats_reg_val); 289 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_v(ecc_stats_reg_val);
290 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_m(); 290 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_m();
291 gk20a_writel(g, 291 gk20a_writel(g,
@@ -300,7 +300,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
300 300
301 ecc_stats_reg_val = gk20a_readl(g, 301 ecc_stats_reg_val = gk20a_readl(g,
302 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset); 302 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset);
303 g->ecc.gr.tex_total_sec_pipe1_count.counters[tpc] += 303 g->ecc.gr.tex_ecc_total_sec_pipe1_count[gpc][tpc].counter +=
304 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_v(ecc_stats_reg_val); 304 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_v(ecc_stats_reg_val);
305 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_m(); 305 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_m();
306 gk20a_writel(g, 306 gk20a_writel(g,
@@ -309,7 +309,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
309 309
310 ecc_stats_reg_val = gk20a_readl(g, 310 ecc_stats_reg_val = gk20a_readl(g,
311 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset); 311 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset);
312 g->ecc.gr.tex_unique_sec_pipe1_count.counters[tpc] += 312 g->ecc.gr.tex_unique_ecc_sec_pipe1_count[gpc][tpc].counter +=
313 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_v(ecc_stats_reg_val); 313 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_v(ecc_stats_reg_val);
314 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_m(); 314 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_m();
315 gk20a_writel(g, 315 gk20a_writel(g,
@@ -332,7 +332,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
332 332
333 ecc_stats_reg_val = gk20a_readl(g, 333 ecc_stats_reg_val = gk20a_readl(g,
334 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset); 334 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset);
335 g->ecc.gr.tex_total_ded_pipe0_count.counters[tpc] += 335 g->ecc.gr.tex_ecc_total_ded_pipe0_count[gpc][tpc].counter +=
336 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_v(ecc_stats_reg_val); 336 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_v(ecc_stats_reg_val);
337 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_m(); 337 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_m();
338 gk20a_writel(g, 338 gk20a_writel(g,
@@ -341,7 +341,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
341 341
342 ecc_stats_reg_val = gk20a_readl(g, 342 ecc_stats_reg_val = gk20a_readl(g,
343 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset); 343 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset);
344 g->ecc.gr.tex_unique_ded_pipe0_count.counters[tpc] += 344 g->ecc.gr.tex_unique_ecc_ded_pipe0_count[gpc][tpc].counter +=
345 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_v(ecc_stats_reg_val); 345 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_v(ecc_stats_reg_val);
346 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_m(); 346 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_m();
347 gk20a_writel(g, 347 gk20a_writel(g,
@@ -356,7 +356,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
356 356
357 ecc_stats_reg_val = gk20a_readl(g, 357 ecc_stats_reg_val = gk20a_readl(g,
358 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset); 358 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset);
359 g->ecc.gr.tex_total_ded_pipe1_count.counters[tpc] += 359 g->ecc.gr.tex_ecc_total_ded_pipe1_count[gpc][tpc].counter +=
360 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_v(ecc_stats_reg_val); 360 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_v(ecc_stats_reg_val);
361 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_m(); 361 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_m();
362 gk20a_writel(g, 362 gk20a_writel(g,
@@ -365,7 +365,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
365 365
366 ecc_stats_reg_val = gk20a_readl(g, 366 ecc_stats_reg_val = gk20a_readl(g,
367 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset); 367 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset);
368 g->ecc.gr.tex_unique_ded_pipe1_count.counters[tpc] += 368 g->ecc.gr.tex_unique_ecc_ded_pipe1_count[gpc][tpc].counter +=
369 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_v(ecc_stats_reg_val); 369 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_v(ecc_stats_reg_val);
370 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_m(); 370 ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_m();
371 gk20a_writel(g, 371 gk20a_writel(g,
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index 94adf727..d32f644d 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -56,6 +56,7 @@
56#include "gp10b/regops_gp10b.h" 56#include "gp10b/regops_gp10b.h"
57#include "gp10b/therm_gp10b.h" 57#include "gp10b/therm_gp10b.h"
58#include "gp10b/priv_ring_gp10b.h" 58#include "gp10b/priv_ring_gp10b.h"
59#include "gp10b/ecc_gp10b.h"
59 60
60#include "gm20b/ltc_gm20b.h" 61#include "gm20b/ltc_gm20b.h"
61#include "gm20b/gr_gm20b.h" 62#include "gm20b/gr_gm20b.h"
@@ -339,11 +340,8 @@ static const struct gpu_ops gp10b_ops = {
339 .init_preemption_state = gr_gp10b_init_preemption_state, 340 .init_preemption_state = gr_gp10b_init_preemption_state,
340 .update_boosted_ctx = gr_gp10b_update_boosted_ctx, 341 .update_boosted_ctx = gr_gp10b_update_boosted_ctx,
341 .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3, 342 .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
342#ifdef CONFIG_SYSFS
343 .create_gr_sysfs = gr_gp10b_create_sysfs,
344 .remove_gr_sysfs = gr_gp10b_remove_sysfs,
345#endif
346 .set_ctxsw_preemption_mode = gr_gp10b_set_ctxsw_preemption_mode, 343 .set_ctxsw_preemption_mode = gr_gp10b_set_ctxsw_preemption_mode,
344 .init_ecc = gp10b_ecc_init,
347 .init_ctxsw_hdr_data = gr_gp10b_init_ctxsw_hdr_data, 345 .init_ctxsw_hdr_data = gr_gp10b_init_ctxsw_hdr_data,
348 .init_gfxp_wfi_timeout_count = 346 .init_gfxp_wfi_timeout_count =
349 gr_gp10b_init_gfxp_wfi_timeout_count, 347 gr_gp10b_init_gfxp_wfi_timeout_count,
diff --git a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
index 1e5807d5..aeeda4a8 100644
--- a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
@@ -249,7 +249,7 @@ void gp10b_ltc_isr(struct gk20a *g)
249 ecc_stats_reg_val = 249 ecc_stats_reg_val =
250 gk20a_readl(g, 250 gk20a_readl(g,
251 ltc_ltc0_lts0_dstg_ecc_report_r() + offset); 251 ltc_ltc0_lts0_dstg_ecc_report_r() + offset);
252 g->ecc.ltc.l2_sec_count.counters[ltc*g->ltc_count + slice] += 252 g->ecc.ltc.ecc_sec_count[ltc][slice].counter +=
253 ltc_ltc0_lts0_dstg_ecc_report_sec_count_v(ecc_stats_reg_val); 253 ltc_ltc0_lts0_dstg_ecc_report_sec_count_v(ecc_stats_reg_val);
254 ecc_stats_reg_val &= 254 ecc_stats_reg_val &=
255 ~(ltc_ltc0_lts0_dstg_ecc_report_sec_count_m()); 255 ~(ltc_ltc0_lts0_dstg_ecc_report_sec_count_m());
@@ -268,7 +268,7 @@ void gp10b_ltc_isr(struct gk20a *g)
268 ecc_stats_reg_val = 268 ecc_stats_reg_val =
269 gk20a_readl(g, 269 gk20a_readl(g,
270 ltc_ltc0_lts0_dstg_ecc_report_r() + offset); 270 ltc_ltc0_lts0_dstg_ecc_report_r() + offset);
271 g->ecc.ltc.l2_ded_count.counters[ltc*g->ltc_count + slice] += 271 g->ecc.ltc.ecc_ded_count[ltc][slice].counter +=
272 ltc_ltc0_lts0_dstg_ecc_report_ded_count_v(ecc_stats_reg_val); 272 ltc_ltc0_lts0_dstg_ecc_report_ded_count_v(ecc_stats_reg_val);
273 ecc_stats_reg_val &= 273 ecc_stats_reg_val &=
274 ~(ltc_ltc0_lts0_dstg_ecc_report_ded_count_m()); 274 ~(ltc_ltc0_lts0_dstg_ecc_report_ded_count_m());
diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c
index fdbbef36..6134dedc 100644
--- a/drivers/gpu/nvgpu/gv100/hal_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c
@@ -415,10 +415,6 @@ static const struct gpu_ops gv100_ops = {
415 .update_boosted_ctx = gr_gp10b_update_boosted_ctx, 415 .update_boosted_ctx = gr_gp10b_update_boosted_ctx,
416 .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3, 416 .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
417 .set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4, 417 .set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4,
418#ifdef CONFIG_SYSFS
419 .create_gr_sysfs = gr_gv11b_create_sysfs,
420 .remove_gr_sysfs = gr_gv11b_remove_sysfs,
421#endif
422 .set_ctxsw_preemption_mode = gr_gp10b_set_ctxsw_preemption_mode, 418 .set_ctxsw_preemption_mode = gr_gp10b_set_ctxsw_preemption_mode,
423 .is_etpc_addr = gv11b_gr_pri_is_etpc_addr, 419 .is_etpc_addr = gv11b_gr_pri_is_etpc_addr,
424 .egpc_etpc_priv_addr_table = gv11b_gr_egpc_etpc_priv_addr_table, 420 .egpc_etpc_priv_addr_table = gv11b_gr_egpc_etpc_priv_addr_table,
diff --git a/drivers/gpu/nvgpu/gv11b/ecc_gv11b.c b/drivers/gpu/nvgpu/gv11b/ecc_gv11b.c
new file mode 100644
index 00000000..6e29bf94
--- /dev/null
+++ b/drivers/gpu/nvgpu/gv11b/ecc_gv11b.c
@@ -0,0 +1,181 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#include <nvgpu/ecc.h>
24
25#include "gk20a/gk20a.h"
26#include "gv11b/ecc_gv11b.h"
27
28int gv11b_ecc_init(struct gk20a *g)
29{
30 int err;
31
32 err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_lrf_ecc_single_err_count);
33 if (err != 0) {
34 goto done;
35 }
36 err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_lrf_ecc_double_err_count);
37 if (err != 0) {
38 goto done;
39 }
40
41 err = NVGPU_ECC_COUNTER_INIT_PER_TPC(
42 sm_l1_tag_ecc_corrected_err_count);
43 if (err != 0) {
44 goto done;
45 }
46 err = NVGPU_ECC_COUNTER_INIT_PER_TPC(
47 sm_l1_tag_ecc_uncorrected_err_count);
48 if (err != 0) {
49 goto done;
50 }
51
52 err = NVGPU_ECC_COUNTER_INIT_PER_TPC(
53 sm_cbu_ecc_corrected_err_count);
54 if (err != 0) {
55 goto done;
56 }
57 err = NVGPU_ECC_COUNTER_INIT_PER_TPC(
58 sm_cbu_ecc_uncorrected_err_count);
59 if (err != 0) {
60 goto done;
61 }
62
63 err = NVGPU_ECC_COUNTER_INIT_PER_TPC(
64 sm_l1_data_ecc_corrected_err_count);
65 if (err != 0) {
66 goto done;
67 }
68 err = NVGPU_ECC_COUNTER_INIT_PER_TPC(
69 sm_l1_data_ecc_uncorrected_err_count);
70 if (err != 0) {
71 goto done;
72 }
73
74 err = NVGPU_ECC_COUNTER_INIT_PER_TPC(
75 sm_icache_ecc_corrected_err_count);
76 if (err != 0) {
77 goto done;
78 }
79 err = NVGPU_ECC_COUNTER_INIT_PER_TPC(
80 sm_icache_ecc_uncorrected_err_count);
81 if (err != 0) {
82 goto done;
83 }
84
85 err = NVGPU_ECC_COUNTER_INIT_PER_GPC(
86 gcc_l15_ecc_corrected_err_count);
87 if (err != 0) {
88 goto done;
89 }
90 err = NVGPU_ECC_COUNTER_INIT_PER_GPC(
91 gcc_l15_ecc_uncorrected_err_count);
92 if (err != 0) {
93 goto done;
94 }
95
96 err = NVGPU_ECC_COUNTER_INIT_PER_LTS(ecc_sec_count);
97 if (err != 0) {
98 goto done;
99 }
100 err = NVGPU_ECC_COUNTER_INIT_PER_LTS(ecc_ded_count);
101 if (err != 0) {
102 goto done;
103 }
104
105 err = NVGPU_ECC_COUNTER_INIT_GR(fecs_ecc_uncorrected_err_count);
106 if (err != 0) {
107 goto done;
108 }
109 err = NVGPU_ECC_COUNTER_INIT_GR(fecs_ecc_corrected_err_count);
110 if (err != 0) {
111 goto done;
112 }
113
114 err = NVGPU_ECC_COUNTER_INIT_PER_GPC(
115 gpccs_ecc_uncorrected_err_count);
116 if (err != 0) {
117 goto done;
118 }
119 err = NVGPU_ECC_COUNTER_INIT_PER_GPC(
120 gpccs_ecc_corrected_err_count);
121 if (err != 0) {
122 goto done;
123 }
124
125 err = NVGPU_ECC_COUNTER_INIT_PER_GPC(
126 mmu_l1tlb_ecc_uncorrected_err_count);
127 if (err != 0) {
128 goto done;
129 }
130 err = NVGPU_ECC_COUNTER_INIT_PER_GPC(
131 mmu_l1tlb_ecc_corrected_err_count);
132 if (err != 0) {
133 goto done;
134 }
135
136 err = NVGPU_ECC_COUNTER_INIT_FB(mmu_l2tlb_ecc_uncorrected_err_count);
137 if (err != 0) {
138 goto done;
139 }
140 err = NVGPU_ECC_COUNTER_INIT_FB(mmu_l2tlb_ecc_corrected_err_count);
141 if (err != 0) {
142 goto done;
143 }
144
145 err = NVGPU_ECC_COUNTER_INIT_FB(mmu_hubtlb_ecc_uncorrected_err_count);
146 if (err != 0) {
147 goto done;
148 }
149 err = NVGPU_ECC_COUNTER_INIT_FB(mmu_hubtlb_ecc_corrected_err_count);
150 if (err != 0) {
151 goto done;
152 }
153
154 err = NVGPU_ECC_COUNTER_INIT_FB(
155 mmu_fillunit_ecc_uncorrected_err_count);
156 if (err != 0) {
157 goto done;
158 }
159 err = NVGPU_ECC_COUNTER_INIT_FB(
160 mmu_fillunit_ecc_corrected_err_count);
161 if (err != 0) {
162 goto done;
163 }
164
165 err = NVGPU_ECC_COUNTER_INIT_PMU(pmu_ecc_uncorrected_err_count);
166 if (err != 0) {
167 goto done;
168 }
169 err = NVGPU_ECC_COUNTER_INIT_PMU(pmu_ecc_corrected_err_count);
170 if (err != 0) {
171 goto done;
172 }
173
174done:
175 if (err != 0) {
176 nvgpu_err(g, "ecc counter allocate failed, err=%d", err);
177 nvgpu_ecc_free(g);
178 }
179
180 return err;
181}
diff --git a/drivers/gpu/nvgpu/gv11b/ecc_gv11b.h b/drivers/gpu/nvgpu/gv11b/ecc_gv11b.h
new file mode 100644
index 00000000..ce0f12b9
--- /dev/null
+++ b/drivers/gpu/nvgpu/gv11b/ecc_gv11b.h
@@ -0,0 +1,28 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#ifndef __ECC_GV11B_H__
24#define __ECC_GV11B_H__
25
26int gv11b_ecc_init(struct gk20a *g);
27
28#endif
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index d3fe5f65..c2f47a20 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -198,7 +198,7 @@ static int gr_gv11b_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32 tpc,
198 l1_tag_corrected_err_count_delta += 198 l1_tag_corrected_err_count_delta +=
199 (is_l1_tag_ecc_corrected_total_err_overflow << 199 (is_l1_tag_ecc_corrected_total_err_overflow <<
200 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_total_s()); 200 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_total_s());
201 g->ecc.gr.sm_l1_tag_corrected_err_count.counters[tpc] += 201 g->ecc.gr.sm_l1_tag_ecc_corrected_err_count[gpc][tpc].counter +=
202 l1_tag_corrected_err_count_delta; 202 l1_tag_corrected_err_count_delta;
203 gk20a_writel(g, 203 gk20a_writel(g,
204 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_r() + offset, 204 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_r() + offset,
@@ -213,7 +213,7 @@ static int gr_gv11b_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32 tpc,
213 l1_tag_uncorrected_err_count_delta += 213 l1_tag_uncorrected_err_count_delta +=
214 (is_l1_tag_ecc_uncorrected_total_err_overflow << 214 (is_l1_tag_ecc_uncorrected_total_err_overflow <<
215 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_total_s()); 215 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_total_s());
216 g->ecc.gr.sm_l1_tag_uncorrected_err_count.counters[tpc] += 216 g->ecc.gr.sm_l1_tag_ecc_uncorrected_err_count[gpc][tpc].counter +=
217 l1_tag_uncorrected_err_count_delta; 217 l1_tag_uncorrected_err_count_delta;
218 gk20a_writel(g, 218 gk20a_writel(g,
219 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_r() + offset, 219 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_r() + offset,
@@ -290,7 +290,7 @@ static int gr_gv11b_handle_lrf_exception(struct gk20a *g, u32 gpc, u32 tpc,
290 lrf_corrected_err_count_delta += 290 lrf_corrected_err_count_delta +=
291 (is_lrf_ecc_corrected_total_err_overflow << 291 (is_lrf_ecc_corrected_total_err_overflow <<
292 gr_pri_gpc0_tpc0_sm_lrf_ecc_corrected_err_count_total_s()); 292 gr_pri_gpc0_tpc0_sm_lrf_ecc_corrected_err_count_total_s());
293 g->ecc.gr.sm_lrf_single_err_count.counters[tpc] += 293 g->ecc.gr.sm_lrf_ecc_single_err_count[gpc][tpc].counter +=
294 lrf_corrected_err_count_delta; 294 lrf_corrected_err_count_delta;
295 gk20a_writel(g, 295 gk20a_writel(g,
296 gr_pri_gpc0_tpc0_sm_lrf_ecc_corrected_err_count_r() + offset, 296 gr_pri_gpc0_tpc0_sm_lrf_ecc_corrected_err_count_r() + offset,
@@ -305,7 +305,7 @@ static int gr_gv11b_handle_lrf_exception(struct gk20a *g, u32 gpc, u32 tpc,
305 lrf_uncorrected_err_count_delta += 305 lrf_uncorrected_err_count_delta +=
306 (is_lrf_ecc_uncorrected_total_err_overflow << 306 (is_lrf_ecc_uncorrected_total_err_overflow <<
307 gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_total_s()); 307 gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_total_s());
308 g->ecc.gr.sm_lrf_double_err_count.counters[tpc] += 308 g->ecc.gr.sm_lrf_ecc_double_err_count[gpc][tpc].counter +=
309 lrf_uncorrected_err_count_delta; 309 lrf_uncorrected_err_count_delta;
310 gk20a_writel(g, 310 gk20a_writel(g,
311 gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_r() + offset, 311 gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_r() + offset,
@@ -449,7 +449,7 @@ static int gr_gv11b_handle_cbu_exception(struct gk20a *g, u32 gpc, u32 tpc,
449 cbu_corrected_err_count_delta += 449 cbu_corrected_err_count_delta +=
450 (is_cbu_ecc_corrected_total_err_overflow << 450 (is_cbu_ecc_corrected_total_err_overflow <<
451 gr_pri_gpc0_tpc0_sm_cbu_ecc_corrected_err_count_total_s()); 451 gr_pri_gpc0_tpc0_sm_cbu_ecc_corrected_err_count_total_s());
452 g->ecc.gr.sm_cbu_corrected_err_count.counters[tpc] += 452 g->ecc.gr.sm_cbu_ecc_corrected_err_count[gpc][tpc].counter +=
453 cbu_corrected_err_count_delta; 453 cbu_corrected_err_count_delta;
454 gk20a_writel(g, 454 gk20a_writel(g,
455 gr_pri_gpc0_tpc0_sm_cbu_ecc_corrected_err_count_r() + offset, 455 gr_pri_gpc0_tpc0_sm_cbu_ecc_corrected_err_count_r() + offset,
@@ -464,7 +464,7 @@ static int gr_gv11b_handle_cbu_exception(struct gk20a *g, u32 gpc, u32 tpc,
464 cbu_uncorrected_err_count_delta += 464 cbu_uncorrected_err_count_delta +=
465 (is_cbu_ecc_uncorrected_total_err_overflow << 465 (is_cbu_ecc_uncorrected_total_err_overflow <<
466 gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_total_s()); 466 gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_total_s());
467 g->ecc.gr.sm_cbu_uncorrected_err_count.counters[tpc] += 467 g->ecc.gr.sm_cbu_ecc_uncorrected_err_count[gpc][tpc].counter +=
468 cbu_uncorrected_err_count_delta; 468 cbu_uncorrected_err_count_delta;
469 gk20a_writel(g, 469 gk20a_writel(g,
470 gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_r() + offset, 470 gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_r() + offset,
@@ -529,7 +529,7 @@ static int gr_gv11b_handle_l1_data_exception(struct gk20a *g, u32 gpc, u32 tpc,
529 l1_data_corrected_err_count_delta += 529 l1_data_corrected_err_count_delta +=
530 (is_l1_data_ecc_corrected_total_err_overflow << 530 (is_l1_data_ecc_corrected_total_err_overflow <<
531 gr_pri_gpc0_tpc0_sm_l1_data_ecc_corrected_err_count_total_s()); 531 gr_pri_gpc0_tpc0_sm_l1_data_ecc_corrected_err_count_total_s());
532 g->ecc.gr.sm_l1_data_corrected_err_count.counters[tpc] += 532 g->ecc.gr.sm_l1_data_ecc_corrected_err_count[gpc][tpc].counter +=
533 l1_data_corrected_err_count_delta; 533 l1_data_corrected_err_count_delta;
534 gk20a_writel(g, 534 gk20a_writel(g,
535 gr_pri_gpc0_tpc0_sm_l1_data_ecc_corrected_err_count_r() + offset, 535 gr_pri_gpc0_tpc0_sm_l1_data_ecc_corrected_err_count_r() + offset,
@@ -544,7 +544,7 @@ static int gr_gv11b_handle_l1_data_exception(struct gk20a *g, u32 gpc, u32 tpc,
544 l1_data_uncorrected_err_count_delta += 544 l1_data_uncorrected_err_count_delta +=
545 (is_l1_data_ecc_uncorrected_total_err_overflow << 545 (is_l1_data_ecc_uncorrected_total_err_overflow <<
546 gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_total_s()); 546 gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_total_s());
547 g->ecc.gr.sm_l1_data_uncorrected_err_count.counters[tpc] += 547 g->ecc.gr.sm_l1_data_ecc_uncorrected_err_count[gpc][tpc].counter +=
548 l1_data_uncorrected_err_count_delta; 548 l1_data_uncorrected_err_count_delta;
549 gk20a_writel(g, 549 gk20a_writel(g,
550 gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_r() + offset, 550 gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_r() + offset,
@@ -613,7 +613,7 @@ static int gr_gv11b_handle_icache_exception(struct gk20a *g, u32 gpc, u32 tpc,
613 icache_corrected_err_count_delta += 613 icache_corrected_err_count_delta +=
614 (is_icache_ecc_corrected_total_err_overflow << 614 (is_icache_ecc_corrected_total_err_overflow <<
615 gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_total_s()); 615 gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_total_s());
616 g->ecc.gr.sm_icache_corrected_err_count.counters[tpc] += 616 g->ecc.gr.sm_icache_ecc_corrected_err_count[gpc][tpc].counter +=
617 icache_corrected_err_count_delta; 617 icache_corrected_err_count_delta;
618 gk20a_writel(g, 618 gk20a_writel(g,
619 gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_r() + offset, 619 gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_r() + offset,
@@ -628,7 +628,7 @@ static int gr_gv11b_handle_icache_exception(struct gk20a *g, u32 gpc, u32 tpc,
628 icache_uncorrected_err_count_delta += 628 icache_uncorrected_err_count_delta +=
629 (is_icache_ecc_uncorrected_total_err_overflow << 629 (is_icache_ecc_uncorrected_total_err_overflow <<
630 gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_total_s()); 630 gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_total_s());
631 g->ecc.gr.sm_icache_uncorrected_err_count.counters[tpc] += 631 g->ecc.gr.sm_icache_ecc_uncorrected_err_count[gpc][tpc].counter +=
632 icache_uncorrected_err_count_delta; 632 icache_uncorrected_err_count_delta;
633 gk20a_writel(g, 633 gk20a_writel(g,
634 gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_r() + offset, 634 gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_r() + offset,
@@ -717,7 +717,7 @@ int gr_gv11b_handle_gcc_exception(struct gk20a *g, u32 gpc, u32 tpc,
717 gcc_l15_corrected_err_count_delta += 717 gcc_l15_corrected_err_count_delta +=
718 (is_gcc_l15_ecc_corrected_total_err_overflow << 718 (is_gcc_l15_ecc_corrected_total_err_overflow <<
719 gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_total_s()); 719 gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_total_s());
720 g->ecc.gr.gcc_l15_corrected_err_count.counters[gpc] += 720 g->ecc.gr.gcc_l15_ecc_corrected_err_count[gpc].counter +=
721 gcc_l15_corrected_err_count_delta; 721 gcc_l15_corrected_err_count_delta;
722 gk20a_writel(g, 722 gk20a_writel(g,
723 gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_r() + offset, 723 gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_r() + offset,
@@ -732,7 +732,7 @@ int gr_gv11b_handle_gcc_exception(struct gk20a *g, u32 gpc, u32 tpc,
732 gcc_l15_uncorrected_err_count_delta += 732 gcc_l15_uncorrected_err_count_delta +=
733 (is_gcc_l15_ecc_uncorrected_total_err_overflow << 733 (is_gcc_l15_ecc_uncorrected_total_err_overflow <<
734 gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_total_s()); 734 gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_total_s());
735 g->ecc.gr.gcc_l15_uncorrected_err_count.counters[gpc] += 735 g->ecc.gr.gcc_l15_ecc_uncorrected_err_count[gpc].counter +=
736 gcc_l15_uncorrected_err_count_delta; 736 gcc_l15_uncorrected_err_count_delta;
737 gk20a_writel(g, 737 gk20a_writel(g,
738 gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_r() + offset, 738 gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_r() + offset,
@@ -802,9 +802,9 @@ static int gr_gv11b_handle_gpcmmu_ecc_exception(struct gk20a *g, u32 gpc,
802 uncorrected_delta += (0x1UL << gr_gpc0_mmu_l1tlb_ecc_uncorrected_err_count_total_s()); 802 uncorrected_delta += (0x1UL << gr_gpc0_mmu_l1tlb_ecc_uncorrected_err_count_total_s());
803 803
804 804
805 g->ecc.gr.mmu_l1tlb_corrected_err_count.counters[gpc] += 805 g->ecc.gr.mmu_l1tlb_ecc_corrected_err_count[gpc].counter +=
806 corrected_delta; 806 corrected_delta;
807 g->ecc.gr.mmu_l1tlb_uncorrected_err_count.counters[gpc] += 807 g->ecc.gr.mmu_l1tlb_ecc_uncorrected_err_count[gpc].counter +=
808 uncorrected_delta; 808 uncorrected_delta;
809 nvgpu_log(g, gpu_dbg_intr, 809 nvgpu_log(g, gpu_dbg_intr,
810 "mmu l1tlb gpc:%d ecc interrupt intr: 0x%x", gpc, hww_esr); 810 "mmu l1tlb gpc:%d ecc interrupt intr: 0x%x", gpc, hww_esr);
@@ -824,8 +824,8 @@ static int gr_gv11b_handle_gpcmmu_ecc_exception(struct gk20a *g, u32 gpc,
824 "ecc error address: 0x%x", ecc_addr); 824 "ecc error address: 0x%x", ecc_addr);
825 nvgpu_log(g, gpu_dbg_intr, 825 nvgpu_log(g, gpu_dbg_intr,
826 "ecc error count corrected: %d, uncorrected %d", 826 "ecc error count corrected: %d, uncorrected %d",
827 g->ecc.gr.mmu_l1tlb_corrected_err_count.counters[gpc], 827 g->ecc.gr.mmu_l1tlb_ecc_corrected_err_count[gpc].counter,
828 g->ecc.gr.mmu_l1tlb_uncorrected_err_count.counters[gpc]); 828 g->ecc.gr.mmu_l1tlb_ecc_uncorrected_err_count[gpc].counter);
829 829
830 return ret; 830 return ret;
831} 831}
@@ -880,9 +880,9 @@ static int gr_gv11b_handle_gpccs_ecc_exception(struct gk20a *g, u32 gpc,
880 gk20a_writel(g, gr_gpc0_gpccs_falcon_ecc_status_r() + offset, 880 gk20a_writel(g, gr_gpc0_gpccs_falcon_ecc_status_r() + offset,
881 gr_gpc0_gpccs_falcon_ecc_status_reset_task_f()); 881 gr_gpc0_gpccs_falcon_ecc_status_reset_task_f());
882 882
883 g->ecc.gr.gpccs_corrected_err_count.counters[gpc] += 883 g->ecc.gr.gpccs_ecc_corrected_err_count[gpc].counter +=
884 corrected_delta; 884 corrected_delta;
885 g->ecc.gr.gpccs_uncorrected_err_count.counters[gpc] += 885 g->ecc.gr.gpccs_ecc_uncorrected_err_count[gpc].counter +=
886 uncorrected_delta; 886 uncorrected_delta;
887 nvgpu_log(g, gpu_dbg_intr, 887 nvgpu_log(g, gpu_dbg_intr,
888 "gppcs gpc:%d ecc interrupt intr: 0x%x", gpc, hww_esr); 888 "gppcs gpc:%d ecc interrupt intr: 0x%x", gpc, hww_esr);
@@ -907,8 +907,8 @@ static int gr_gv11b_handle_gpccs_ecc_exception(struct gk20a *g, u32 gpc,
907 907
908 nvgpu_log(g, gpu_dbg_intr, 908 nvgpu_log(g, gpu_dbg_intr,
909 "ecc error count corrected: %d, uncorrected %d", 909 "ecc error count corrected: %d, uncorrected %d",
910 g->ecc.gr.gpccs_corrected_err_count.counters[gpc], 910 g->ecc.gr.gpccs_ecc_corrected_err_count[gpc].counter,
911 g->ecc.gr.gpccs_uncorrected_err_count.counters[gpc]); 911 g->ecc.gr.gpccs_ecc_uncorrected_err_count[gpc].counter);
912 912
913 return ret; 913 return ret;
914} 914}
@@ -2419,9 +2419,9 @@ static void gr_gv11b_handle_fecs_ecc_error(struct gk20a *g, u32 intr)
2419 gk20a_writel(g, gr_fecs_falcon_ecc_status_r(), 2419 gk20a_writel(g, gr_fecs_falcon_ecc_status_r(),
2420 gr_fecs_falcon_ecc_status_reset_task_f()); 2420 gr_fecs_falcon_ecc_status_reset_task_f());
2421 2421
2422 g->ecc.gr.fecs_corrected_err_count.counters[0] += 2422 g->ecc.gr.fecs_ecc_corrected_err_count[0].counter +=
2423 corrected_delta; 2423 corrected_delta;
2424 g->ecc.gr.fecs_uncorrected_err_count.counters[0] += 2424 g->ecc.gr.fecs_ecc_uncorrected_err_count[0].counter +=
2425 uncorrected_delta; 2425 uncorrected_delta;
2426 2426
2427 nvgpu_log(g, gpu_dbg_intr, 2427 nvgpu_log(g, gpu_dbg_intr,
@@ -2450,8 +2450,8 @@ static void gr_gv11b_handle_fecs_ecc_error(struct gk20a *g, u32 intr)
2450 2450
2451 nvgpu_log(g, gpu_dbg_intr, 2451 nvgpu_log(g, gpu_dbg_intr,
2452 "ecc error count corrected: %d, uncorrected %d", 2452 "ecc error count corrected: %d, uncorrected %d",
2453 g->ecc.gr.fecs_corrected_err_count.counters[0], 2453 g->ecc.gr.fecs_ecc_corrected_err_count[0].counter,
2454 g->ecc.gr.fecs_uncorrected_err_count.counters[0]); 2454 g->ecc.gr.fecs_ecc_uncorrected_err_count[0].counter);
2455 } 2455 }
2456} 2456}
2457 2457
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
index 366d6928..efac772c 100644
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -84,6 +84,7 @@
84#include "regops_gv11b.h" 84#include "regops_gv11b.h"
85#include "subctx_gv11b.h" 85#include "subctx_gv11b.h"
86#include "therm_gv11b.h" 86#include "therm_gv11b.h"
87#include "ecc_gv11b.h"
87 88
88#include <nvgpu/ptimer.h> 89#include <nvgpu/ptimer.h>
89#include <nvgpu/debug.h> 90#include <nvgpu/debug.h>
@@ -369,10 +370,7 @@ static const struct gpu_ops gv11b_ops = {
369 .update_boosted_ctx = gr_gp10b_update_boosted_ctx, 370 .update_boosted_ctx = gr_gp10b_update_boosted_ctx,
370 .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3, 371 .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
371 .set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4, 372 .set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4,
372#ifdef CONFIG_SYSFS 373 .init_ecc = gv11b_ecc_init,
373 .create_gr_sysfs = gr_gv11b_create_sysfs,
374 .remove_gr_sysfs = gr_gv11b_remove_sysfs,
375#endif
376 .set_ctxsw_preemption_mode = gr_gv11b_set_ctxsw_preemption_mode, 374 .set_ctxsw_preemption_mode = gr_gv11b_set_ctxsw_preemption_mode,
377 .is_etpc_addr = gv11b_gr_pri_is_etpc_addr, 375 .is_etpc_addr = gv11b_gr_pri_is_etpc_addr,
378 .egpc_etpc_priv_addr_table = gv11b_gr_egpc_etpc_priv_addr_table, 376 .egpc_etpc_priv_addr_table = gv11b_gr_egpc_etpc_priv_addr_table,
diff --git a/drivers/gpu/nvgpu/gv11b/ltc_gv11b.c b/drivers/gpu/nvgpu/gv11b/ltc_gv11b.c
index 48faa4d2..db797bde 100644
--- a/drivers/gpu/nvgpu/gv11b/ltc_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/ltc_gv11b.c
@@ -90,13 +90,11 @@ void gv11b_ltc_isr(struct gk20a *g)
90 u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt; 90 u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt;
91 u32 corrected_delta, uncorrected_delta; 91 u32 corrected_delta, uncorrected_delta;
92 u32 corrected_overflow, uncorrected_overflow; 92 u32 corrected_overflow, uncorrected_overflow;
93 u32 ltc_corrected, ltc_uncorrected;
94 93
95 mc_intr = gk20a_readl(g, mc_intr_ltc_r()); 94 mc_intr = gk20a_readl(g, mc_intr_ltc_r());
96 for (ltc = 0; ltc < g->ltc_count; ltc++) { 95 for (ltc = 0; ltc < g->ltc_count; ltc++) {
97 if ((mc_intr & 1U << ltc) == 0) 96 if ((mc_intr & 1U << ltc) == 0)
98 continue; 97 continue;
99 ltc_corrected = ltc_uncorrected = 0U;
100 98
101 for (slice = 0; slice < g->gr.slices_per_ltc; slice++) { 99 for (slice = 0; slice < g->gr.slices_per_ltc; slice++) {
102 u32 offset = ltc_stride * ltc + lts_stride * slice; 100 u32 offset = ltc_stride * ltc + lts_stride * slice;
@@ -150,8 +148,8 @@ void gv11b_ltc_isr(struct gk20a *g)
150 if (uncorrected_overflow) 148 if (uncorrected_overflow)
151 uncorrected_delta += (0x1U << ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_s()); 149 uncorrected_delta += (0x1U << ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_s());
152 150
153 ltc_corrected += corrected_delta; 151 g->ecc.ltc.ecc_sec_count[ltc][slice].counter += corrected_delta;
154 ltc_uncorrected += uncorrected_delta; 152 g->ecc.ltc.ecc_ded_count[ltc][slice].counter += uncorrected_delta;
155 nvgpu_log(g, gpu_dbg_intr, 153 nvgpu_log(g, gpu_dbg_intr,
156 "ltc:%d lts: %d cache ecc interrupt intr: 0x%x", ltc, slice, ltc_intr3); 154 "ltc:%d lts: %d cache ecc interrupt intr: 0x%x", ltc, slice, ltc_intr3);
157 155
@@ -177,10 +175,6 @@ void gv11b_ltc_isr(struct gk20a *g)
177 } 175 }
178 176
179 } 177 }
180 g->ecc.ltc.l2_cache_corrected_err_count.counters[ltc] +=
181 ltc_corrected;
182 g->ecc.ltc.l2_cache_uncorrected_err_count.counters[ltc] +=
183 ltc_uncorrected;
184 178
185 } 179 }
186 180
diff --git a/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c b/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c
index 3f0e2f22..9a2e9c00 100644
--- a/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c
@@ -343,8 +343,8 @@ void gv11b_pmu_handle_ext_irq(struct gk20a *g, u32 intr0)
343 if (uncorrected_overflow) 343 if (uncorrected_overflow)
344 uncorrected_delta += (0x1UL << pwr_pmu_falcon_ecc_uncorrected_err_count_total_s()); 344 uncorrected_delta += (0x1UL << pwr_pmu_falcon_ecc_uncorrected_err_count_total_s());
345 345
346 g->ecc.pmu.pmu_corrected_err_count.counters[0] += corrected_delta; 346 g->ecc.pmu.pmu_ecc_corrected_err_count[0].counter += corrected_delta;
347 g->ecc.pmu.pmu_uncorrected_err_count.counters[0] += uncorrected_delta; 347 g->ecc.pmu.pmu_ecc_uncorrected_err_count[0].counter += uncorrected_delta;
348 348
349 nvgpu_log(g, gpu_dbg_intr, 349 nvgpu_log(g, gpu_dbg_intr,
350 "pmu ecc interrupt intr1: 0x%x", intr1); 350 "pmu ecc interrupt intr1: 0x%x", intr1);
@@ -371,8 +371,8 @@ void gv11b_pmu_handle_ext_irq(struct gk20a *g, u32 intr0)
371 371
372 nvgpu_log(g, gpu_dbg_intr, 372 nvgpu_log(g, gpu_dbg_intr,
373 "ecc error count corrected: %d, uncorrected %d", 373 "ecc error count corrected: %d, uncorrected %d",
374 g->ecc.pmu.pmu_corrected_err_count.counters[0], 374 g->ecc.pmu.pmu_ecc_corrected_err_count[0].counter,
375 g->ecc.pmu.pmu_uncorrected_err_count.counters[0]); 375 g->ecc.pmu.pmu_ecc_uncorrected_err_count[0].counter);
376 } 376 }
377 } 377 }
378} 378}
diff --git a/drivers/gpu/nvgpu/include/nvgpu/ecc.h b/drivers/gpu/nvgpu/include/nvgpu/ecc.h
new file mode 100644
index 00000000..9b211ef7
--- /dev/null
+++ b/drivers/gpu/nvgpu/include/nvgpu/ecc.h
@@ -0,0 +1,162 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#ifndef NVGPU_ECC_H
24#define NVGPU_ECC_H
25
26#include <nvgpu/types.h>
27#include <nvgpu/list.h>
28
29#define NVGPU_ECC_STAT_NAME_MAX_SIZE 100
30
31struct gk20a;
32
33struct nvgpu_ecc_stat {
34 char name[NVGPU_ECC_STAT_NAME_MAX_SIZE];
35 u32 counter;
36 struct nvgpu_list_node node;
37};
38
39static inline struct nvgpu_ecc_stat *nvgpu_ecc_stat_from_node(
40 struct nvgpu_list_node *node)
41{
42 return (struct nvgpu_ecc_stat *)(
43 (uintptr_t)node - offsetof(struct nvgpu_ecc_stat, node)
44 );
45}
46
47struct nvgpu_ecc {
48 struct {
49 /* stats per tpc */
50
51 struct nvgpu_ecc_stat **sm_lrf_ecc_single_err_count;
52 struct nvgpu_ecc_stat **sm_lrf_ecc_double_err_count;
53
54 struct nvgpu_ecc_stat **sm_shm_ecc_sec_count;
55 struct nvgpu_ecc_stat **sm_shm_ecc_sed_count;
56 struct nvgpu_ecc_stat **sm_shm_ecc_ded_count;
57
58 struct nvgpu_ecc_stat **tex_ecc_total_sec_pipe0_count;
59 struct nvgpu_ecc_stat **tex_ecc_total_ded_pipe0_count;
60 struct nvgpu_ecc_stat **tex_unique_ecc_sec_pipe0_count;
61 struct nvgpu_ecc_stat **tex_unique_ecc_ded_pipe0_count;
62 struct nvgpu_ecc_stat **tex_ecc_total_sec_pipe1_count;
63 struct nvgpu_ecc_stat **tex_ecc_total_ded_pipe1_count;
64 struct nvgpu_ecc_stat **tex_unique_ecc_sec_pipe1_count;
65 struct nvgpu_ecc_stat **tex_unique_ecc_ded_pipe1_count;
66
67 struct nvgpu_ecc_stat **sm_l1_tag_ecc_corrected_err_count;
68 struct nvgpu_ecc_stat **sm_l1_tag_ecc_uncorrected_err_count;
69 struct nvgpu_ecc_stat **sm_cbu_ecc_corrected_err_count;
70 struct nvgpu_ecc_stat **sm_cbu_ecc_uncorrected_err_count;
71 struct nvgpu_ecc_stat **sm_l1_data_ecc_corrected_err_count;
72 struct nvgpu_ecc_stat **sm_l1_data_ecc_uncorrected_err_count;
73 struct nvgpu_ecc_stat **sm_icache_ecc_corrected_err_count;
74 struct nvgpu_ecc_stat **sm_icache_ecc_uncorrected_err_count;
75
76 /* stats per gpc */
77
78 struct nvgpu_ecc_stat *gcc_l15_ecc_corrected_err_count;
79 struct nvgpu_ecc_stat *gcc_l15_ecc_uncorrected_err_count;
80
81 struct nvgpu_ecc_stat *gpccs_ecc_corrected_err_count;
82 struct nvgpu_ecc_stat *gpccs_ecc_uncorrected_err_count;
83 struct nvgpu_ecc_stat *mmu_l1tlb_ecc_corrected_err_count;
84 struct nvgpu_ecc_stat *mmu_l1tlb_ecc_uncorrected_err_count;
85
86 /* stats per device */
87 struct nvgpu_ecc_stat *fecs_ecc_corrected_err_count;
88 struct nvgpu_ecc_stat *fecs_ecc_uncorrected_err_count;
89 } gr;
90
91 struct {
92 /* stats per lts */
93 struct nvgpu_ecc_stat **ecc_sec_count;
94 struct nvgpu_ecc_stat **ecc_ded_count;
95 } ltc;
96
97 struct {
98 /* stats per device */
99 struct nvgpu_ecc_stat *mmu_l2tlb_ecc_corrected_err_count;
100 struct nvgpu_ecc_stat *mmu_l2tlb_ecc_uncorrected_err_count;
101 struct nvgpu_ecc_stat *mmu_hubtlb_ecc_corrected_err_count;
102 struct nvgpu_ecc_stat *mmu_hubtlb_ecc_uncorrected_err_count;
103 struct nvgpu_ecc_stat *mmu_fillunit_ecc_corrected_err_count;
104 struct nvgpu_ecc_stat *mmu_fillunit_ecc_uncorrected_err_count;
105 } fb;
106
107 struct {
108 /* stats per device */
109 struct nvgpu_ecc_stat *pmu_ecc_corrected_err_count;
110 struct nvgpu_ecc_stat *pmu_ecc_uncorrected_err_count;
111 } pmu;
112
113 struct {
114 /* stats per fbpa */
115 struct nvgpu_ecc_stat *fbpa_ecc_sec_err_count;
116 struct nvgpu_ecc_stat *fbpa_ecc_ded_err_count;
117 } fbpa;
118
119 struct nvgpu_list_node stats_list;
120 int stats_count;
121};
122
123int nvgpu_ecc_counter_init_per_tpc(struct gk20a *g,
124 struct nvgpu_ecc_stat ***stat, const char *name);
125#define NVGPU_ECC_COUNTER_INIT_PER_TPC(stat) \
126 nvgpu_ecc_counter_init_per_tpc(g, &g->ecc.gr.stat, #stat)
127
128int nvgpu_ecc_counter_init_per_gpc(struct gk20a *g,
129 struct nvgpu_ecc_stat **stat, const char *name);
130#define NVGPU_ECC_COUNTER_INIT_PER_GPC(stat) \
131 nvgpu_ecc_counter_init_per_gpc(g, &g->ecc.gr.stat, #stat)
132
133int nvgpu_ecc_counter_init(struct gk20a *g,
134 struct nvgpu_ecc_stat **stat, const char *name);
135#define NVGPU_ECC_COUNTER_INIT_GR(stat) \
136 nvgpu_ecc_counter_init(g, &g->ecc.gr.stat, #stat)
137#define NVGPU_ECC_COUNTER_INIT_FB(stat) \
138 nvgpu_ecc_counter_init(g, &g->ecc.fb.stat, #stat)
139#define NVGPU_ECC_COUNTER_INIT_PMU(stat) \
140 nvgpu_ecc_counter_init(g, &g->ecc.pmu.stat, #stat)
141
142int nvgpu_ecc_counter_init_per_lts(struct gk20a *g,
143 struct nvgpu_ecc_stat ***stat, const char *name);
144#define NVGPU_ECC_COUNTER_INIT_PER_LTS(stat) \
145 nvgpu_ecc_counter_init_per_lts(g, &g->ecc.ltc.stat, #stat)
146
147int nvgpu_ecc_counter_init_per_fbpa(struct gk20a *g,
148 struct nvgpu_ecc_stat **stat, const char *name);
149#define NVGPU_ECC_COUNTER_INIT_PER_FBPA(stat) \
150 nvgpu_ecc_counter_init_per_fbpa(g, &g->ecc.fbpa.stat, #stat)
151
152void nvgpu_ecc_free(struct gk20a *g);
153
154int nvgpu_ecc_init_support(struct gk20a *g);
155void nvgpu_ecc_remove_support(struct gk20a *g);
156
157/* OSes to implement */
158
159int nvgpu_ecc_sysfs_init(struct gk20a *g);
160void nvgpu_ecc_sysfs_remove(struct gk20a *g);
161
162#endif
diff --git a/drivers/gpu/nvgpu/os/linux/ecc_sysfs.c b/drivers/gpu/nvgpu/os/linux/ecc_sysfs.c
new file mode 100644
index 00000000..0962e247
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/ecc_sysfs.c
@@ -0,0 +1,80 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <nvgpu/ecc.h>
18
19#include "gk20a/gk20a.h"
20#include "os_linux.h"
21
22int nvgpu_ecc_sysfs_init(struct gk20a *g)
23{
24 struct device *dev = dev_from_gk20a(g);
25 struct nvgpu_ecc *ecc = &g->ecc;
26 struct dev_ext_attribute *attr;
27 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
28 struct nvgpu_ecc_stat *stat;
29 int i = 0, err;
30
31 attr = nvgpu_kzalloc(g, sizeof(*attr) * ecc->stats_count);
32 if (!attr)
33 return -ENOMEM;
34
35 nvgpu_list_for_each_entry(stat,
36 &ecc->stats_list, nvgpu_ecc_stat, node) {
37 if (i >= ecc->stats_count) {
38 err = -EINVAL;
39 nvgpu_err(g, "stats_list longer than stats_count %d",
40 ecc->stats_count);
41 break;
42 }
43 sysfs_attr_init(&attr[i].attr);
44 attr[i].attr.attr.name = stat->name;
45 attr[i].attr.attr.mode = VERIFY_OCTAL_PERMISSIONS(S_IRUGO);
46 attr[i].var = &stat->counter;
47 attr[i].attr.show = device_show_int;
48 err = device_create_file(dev, &attr[i].attr);
49 if (err) {
50 nvgpu_err(g, "sysfs node create failed for %s\n",
51 stat->name);
52 break;
53 }
54 i++;
55 }
56
57 if (err) {
58 while (i-- > 0)
59 device_remove_file(dev, &attr[i].attr);
60 nvgpu_kfree(g, attr);
61 return err;
62 }
63
64 l->ecc_attrs = attr;
65
66 return 0;
67}
68
69void nvgpu_ecc_sysfs_remove(struct gk20a *g)
70{
71 struct device *dev = dev_from_gk20a(g);
72 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
73 struct nvgpu_ecc *ecc = &g->ecc;
74 int i;
75
76 for (i = 0; i < ecc->stats_count; i++)
77 device_remove_file(dev, &l->ecc_attrs[i].attr);
78 nvgpu_kfree(g, l->ecc_attrs);
79 l->ecc_attrs = NULL;
80}
diff --git a/drivers/gpu/nvgpu/os/linux/os_linux.h b/drivers/gpu/nvgpu/os/linux/os_linux.h
index 4dcce322..85d697bd 100644
--- a/drivers/gpu/nvgpu/os/linux/os_linux.h
+++ b/drivers/gpu/nvgpu/os/linux/os_linux.h
@@ -141,6 +141,7 @@ struct nvgpu_os_linux {
141 struct dentry *debugfs_dump_ctxsw_stats; 141 struct dentry *debugfs_dump_ctxsw_stats;
142#endif 142#endif
143 DECLARE_HASHTABLE(ecc_sysfs_stats_htable, 5); 143 DECLARE_HASHTABLE(ecc_sysfs_stats_htable, 5);
144 struct dev_ext_attribute *ecc_attrs;
144 145
145 struct gk20a_cde_app cde_app; 146 struct gk20a_cde_app cde_app;
146 147
diff --git a/drivers/gpu/nvgpu/os/linux/pci.c b/drivers/gpu/nvgpu/os/linux/pci.c
index 3493b105..41fb69a0 100644
--- a/drivers/gpu/nvgpu/os/linux/pci.c
+++ b/drivers/gpu/nvgpu/os/linux/pci.c
@@ -52,11 +52,6 @@ static int nvgpu_pci_tegra_probe(struct device *dev)
52 52
53static int nvgpu_pci_tegra_remove(struct device *dev) 53static int nvgpu_pci_tegra_remove(struct device *dev)
54{ 54{
55 struct gk20a *g = get_gk20a(dev);
56
57 if (g->ops.gr.remove_gr_sysfs)
58 g->ops.gr.remove_gr_sysfs(g);
59
60 return 0; 55 return 0;
61} 56}
62 57
diff --git a/drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.c b/drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.c
deleted file mode 100644
index 2a6ace37..00000000
--- a/drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.c
+++ /dev/null
@@ -1,269 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/hashtable.h>
18
19#include <nvgpu/kmem.h>
20#include <nvgpu/bug.h>
21#include <nvgpu/hashtable.h>
22
23#include "os_linux.h"
24
25#include "gk20a/gk20a.h"
26
27#include "platform_gk20a.h"
28#include "platform_gk20a_tegra.h"
29#include "platform_gp10b.h"
30#include "platform_gp10b_tegra.h"
31#include "platform_ecc_sysfs.h"
32
33static u32 gen_ecc_hash_key(char *str)
34{
35 int i = 0;
36 u32 hash_key = 0x811c9dc5;
37
38 while (str[i]) {
39 hash_key *= 0x1000193;
40 hash_key ^= (u32)(str[i]);
41 i++;
42 };
43
44 return hash_key;
45}
46
47static ssize_t ecc_stat_show(struct device *dev,
48 struct device_attribute *attr,
49 char *buf)
50{
51 const char *ecc_stat_full_name = attr->attr.name;
52 const char *ecc_stat_base_name;
53 unsigned int hw_unit;
54 unsigned int subunit;
55 struct gk20a_ecc_stat *ecc_stat;
56 u32 hash_key;
57 struct gk20a *g = get_gk20a(dev);
58 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
59
60 if (sscanf(ecc_stat_full_name, "ltc%u_lts%u", &hw_unit,
61 &subunit) == 2) {
62 ecc_stat_base_name = &(ecc_stat_full_name[strlen("ltc0_lts0_")]);
63 hw_unit = g->gr.slices_per_ltc * hw_unit + subunit;
64 } else if (sscanf(ecc_stat_full_name, "ltc%u", &hw_unit) == 1) {
65 ecc_stat_base_name = &(ecc_stat_full_name[strlen("ltc0_")]);
66 } else if (sscanf(ecc_stat_full_name, "gpc0_tpc%u", &hw_unit) == 1) {
67 ecc_stat_base_name = &(ecc_stat_full_name[strlen("gpc0_tpc0_")]);
68 } else if (sscanf(ecc_stat_full_name, "gpc%u", &hw_unit) == 1) {
69 ecc_stat_base_name = &(ecc_stat_full_name[strlen("gpc0_")]);
70 } else if (sscanf(ecc_stat_full_name, "eng%u", &hw_unit) == 1) {
71 ecc_stat_base_name = &(ecc_stat_full_name[strlen("eng0_")]);
72 } else {
73 return snprintf(buf,
74 PAGE_SIZE,
75 "Error: Invalid ECC stat name!\n");
76 }
77
78 hash_key = gen_ecc_hash_key((char *)ecc_stat_base_name);
79
80 hash_for_each_possible(l->ecc_sysfs_stats_htable,
81 ecc_stat,
82 hash_node,
83 hash_key) {
84 if (hw_unit >= ecc_stat->count)
85 continue;
86 if (!strcmp(ecc_stat_full_name, ecc_stat->names[hw_unit]))
87 return snprintf(buf, PAGE_SIZE, "%u\n", ecc_stat->counters[hw_unit]);
88 }
89
90 return snprintf(buf, PAGE_SIZE, "Error: No ECC stat found!\n");
91}
92
93int nvgpu_gr_ecc_stat_create(struct device *dev,
94 int is_l2, char *ecc_stat_name,
95 struct gk20a_ecc_stat *ecc_stat)
96{
97 struct gk20a *g = get_gk20a(dev);
98 char *ltc_unit_name = "ltc";
99 char *gr_unit_name = "gpc0_tpc";
100 char *lts_unit_name = "lts";
101 int num_hw_units = 0;
102 int num_subunits = 0;
103
104 if (is_l2 == 1)
105 num_hw_units = g->ltc_count;
106 else if (is_l2 == 2) {
107 num_hw_units = g->ltc_count;
108 num_subunits = g->gr.slices_per_ltc;
109 } else
110 num_hw_units = g->gr.tpc_count;
111
112
113 return nvgpu_ecc_stat_create(dev, num_hw_units, num_subunits,
114 is_l2 ? ltc_unit_name : gr_unit_name,
115 num_subunits ? lts_unit_name: NULL,
116 ecc_stat_name,
117 ecc_stat);
118}
119
120int nvgpu_ecc_stat_create(struct device *dev,
121 int num_hw_units, int num_subunits,
122 char *ecc_unit_name, char *ecc_subunit_name,
123 char *ecc_stat_name,
124 struct gk20a_ecc_stat *ecc_stat)
125{
126 int error = 0;
127 struct gk20a *g = get_gk20a(dev);
128 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
129 int hw_unit = 0;
130 int subunit = 0;
131 int element = 0;
132 u32 hash_key = 0;
133 struct device_attribute *dev_attr_array;
134
135 int num_elements = num_subunits ? num_subunits * num_hw_units :
136 num_hw_units;
137
138 /* Allocate arrays */
139 dev_attr_array = nvgpu_kzalloc(g, sizeof(struct device_attribute) *
140 num_elements);
141 ecc_stat->counters = nvgpu_kzalloc(g, sizeof(u32) * num_elements);
142 ecc_stat->names = nvgpu_kzalloc(g, sizeof(char *) * num_elements);
143
144 for (hw_unit = 0; hw_unit < num_elements; hw_unit++) {
145 ecc_stat->names[hw_unit] = nvgpu_kzalloc(g, sizeof(char) *
146 ECC_STAT_NAME_MAX_SIZE);
147 }
148 ecc_stat->count = num_elements;
149 if (num_subunits) {
150 for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) {
151 for (subunit = 0; subunit < num_subunits; subunit++) {
152 element = hw_unit*num_subunits + subunit;
153
154 snprintf(ecc_stat->names[element],
155 ECC_STAT_NAME_MAX_SIZE,
156 "%s%d_%s%d_%s",
157 ecc_unit_name,
158 hw_unit,
159 ecc_subunit_name,
160 subunit,
161 ecc_stat_name);
162
163 sysfs_attr_init(&dev_attr_array[element].attr);
164 dev_attr_array[element].attr.name =
165 ecc_stat->names[element];
166 dev_attr_array[element].attr.mode =
167 VERIFY_OCTAL_PERMISSIONS(S_IRUGO);
168 dev_attr_array[element].show = ecc_stat_show;
169 dev_attr_array[element].store = NULL;
170
171 /* Create sysfs file */
172 error |= device_create_file(dev,
173 &dev_attr_array[element]);
174
175 }
176 }
177 } else {
178 for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) {
179
180 /* Fill in struct device_attribute members */
181 snprintf(ecc_stat->names[hw_unit],
182 ECC_STAT_NAME_MAX_SIZE,
183 "%s%d_%s",
184 ecc_unit_name,
185 hw_unit,
186 ecc_stat_name);
187
188 sysfs_attr_init(&dev_attr_array[hw_unit].attr);
189 dev_attr_array[hw_unit].attr.name =
190 ecc_stat->names[hw_unit];
191 dev_attr_array[hw_unit].attr.mode =
192 VERIFY_OCTAL_PERMISSIONS(S_IRUGO);
193 dev_attr_array[hw_unit].show = ecc_stat_show;
194 dev_attr_array[hw_unit].store = NULL;
195
196 /* Create sysfs file */
197 error |= device_create_file(dev,
198 &dev_attr_array[hw_unit]);
199 }
200 }
201
202 /* Add hash table entry */
203 hash_key = gen_ecc_hash_key(ecc_stat_name);
204 hash_add(l->ecc_sysfs_stats_htable,
205 &ecc_stat->hash_node,
206 hash_key);
207
208 ecc_stat->attr_array = dev_attr_array;
209
210 return error;
211}
212
213void nvgpu_gr_ecc_stat_remove(struct device *dev,
214 int is_l2, struct gk20a_ecc_stat *ecc_stat)
215{
216 struct gk20a *g = get_gk20a(dev);
217 int num_hw_units = 0;
218 int num_subunits = 0;
219
220 if (is_l2 == 1)
221 num_hw_units = g->ltc_count;
222 else if (is_l2 == 2) {
223 num_hw_units = g->ltc_count;
224 num_subunits = g->gr.slices_per_ltc;
225 } else
226 num_hw_units = g->gr.tpc_count;
227
228 nvgpu_ecc_stat_remove(dev, num_hw_units, num_subunits, ecc_stat);
229}
230
231void nvgpu_ecc_stat_remove(struct device *dev,
232 int num_hw_units, int num_subunits,
233 struct gk20a_ecc_stat *ecc_stat)
234{
235 struct gk20a *g = get_gk20a(dev);
236 struct device_attribute *dev_attr_array = ecc_stat->attr_array;
237 int hw_unit = 0;
238 int subunit = 0;
239 int element = 0;
240 int num_elements = num_subunits ? num_subunits * num_hw_units :
241 num_hw_units;
242
243 /* Remove sysfs files */
244 if (num_subunits) {
245 for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) {
246 for (subunit = 0; subunit < num_subunits; subunit++) {
247 element = hw_unit * num_subunits + subunit;
248
249 device_remove_file(dev,
250 &dev_attr_array[element]);
251 }
252 }
253 } else {
254 for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++)
255 device_remove_file(dev, &dev_attr_array[hw_unit]);
256 }
257
258 /* Remove hash table entry */
259 hash_del(&ecc_stat->hash_node);
260
261 /* Free arrays */
262 nvgpu_kfree(g, ecc_stat->counters);
263
264 for (hw_unit = 0; hw_unit < num_elements; hw_unit++)
265 nvgpu_kfree(g, ecc_stat->names[hw_unit]);
266
267 nvgpu_kfree(g, ecc_stat->names);
268 nvgpu_kfree(g, dev_attr_array);
269}
diff --git a/drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.h b/drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.h
deleted file mode 100644
index d29f7bd3..00000000
--- a/drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.h
+++ /dev/null
@@ -1,37 +0,0 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef _NVGPU_PLATFORM_SYSFS_H_
18#define _NVGPU_PLATFORM_SYSFS_H_
19
20#include "gp10b/gr_gp10b.h"
21
22#define ECC_STAT_NAME_MAX_SIZE 100
23
24int nvgpu_gr_ecc_stat_create(struct device *dev,
25 int is_l2, char *ecc_stat_name,
26 struct gk20a_ecc_stat *ecc_stat);
27int nvgpu_ecc_stat_create(struct device *dev,
28 int num_hw_units, int num_subunits,
29 char *ecc_unit_name, char *ecc_subunit_name,
30 char *ecc_stat_name,
31 struct gk20a_ecc_stat *ecc_stat);
32void nvgpu_gr_ecc_stat_remove(struct device *dev,
33 int is_l2, struct gk20a_ecc_stat *ecc_stat);
34void nvgpu_ecc_stat_remove(struct device *dev,
35 int num_hw_units, int num_subunits,
36 struct gk20a_ecc_stat *ecc_stat);
37#endif
diff --git a/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c
index d5530368..c5464d5b 100644
--- a/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c
@@ -41,7 +41,6 @@
41#include "gk20a/gk20a.h" 41#include "gk20a/gk20a.h"
42 42
43#include "platform_gk20a.h" 43#include "platform_gk20a.h"
44#include "platform_ecc_sysfs.h"
45#include "platform_gk20a_tegra.h" 44#include "platform_gk20a_tegra.h"
46#include "platform_gp10b.h" 45#include "platform_gp10b.h"
47#include "platform_gp10b_tegra.h" 46#include "platform_gp10b_tegra.h"
@@ -177,11 +176,6 @@ static int gp10b_tegra_late_probe(struct device *dev)
177 176
178static int gp10b_tegra_remove(struct device *dev) 177static int gp10b_tegra_remove(struct device *dev)
179{ 178{
180 struct gk20a *g = get_gk20a(dev);
181
182 if (g->ops.gr.remove_gr_sysfs)
183 g->ops.gr.remove_gr_sysfs(g);
184
185 /* deinitialise tegra specific scaling quirks */ 179 /* deinitialise tegra specific scaling quirks */
186 gp10b_tegra_scale_exit(dev); 180 gp10b_tegra_scale_exit(dev);
187 181
@@ -476,162 +470,3 @@ struct gk20a_platform gp10b_tegra_platform = {
476 470
477 .secure_buffer_size = 401408, 471 .secure_buffer_size = 401408,
478}; 472};
479
480void gr_gp10b_create_sysfs(struct gk20a *g)
481{
482 int error = 0;
483 struct device *dev = dev_from_gk20a(g);
484
485 /* This stat creation function is called on GR init. GR can get
486 initialized multiple times but we only need to create the ECC
487 stats once. Therefore, add the following check to avoid
488 creating duplicate stat sysfs nodes. */
489 if (g->ecc.gr.sm_lrf_single_err_count.counters != NULL)
490 return;
491
492 error |= nvgpu_gr_ecc_stat_create(dev,
493 0,
494 "sm_lrf_ecc_single_err_count",
495 &g->ecc.gr.sm_lrf_single_err_count);
496
497 error |= nvgpu_gr_ecc_stat_create(dev,
498 0,
499 "sm_lrf_ecc_double_err_count",
500 &g->ecc.gr.sm_lrf_double_err_count);
501
502 error |= nvgpu_gr_ecc_stat_create(dev,
503 0,
504 "sm_shm_ecc_sec_count",
505 &g->ecc.gr.sm_shm_sec_count);
506
507 error |= nvgpu_gr_ecc_stat_create(dev,
508 0,
509 "sm_shm_ecc_sed_count",
510 &g->ecc.gr.sm_shm_sed_count);
511
512 error |= nvgpu_gr_ecc_stat_create(dev,
513 0,
514 "sm_shm_ecc_ded_count",
515 &g->ecc.gr.sm_shm_ded_count);
516
517 error |= nvgpu_gr_ecc_stat_create(dev,
518 0,
519 "tex_ecc_total_sec_pipe0_count",
520 &g->ecc.gr.tex_total_sec_pipe0_count);
521
522 error |= nvgpu_gr_ecc_stat_create(dev,
523 0,
524 "tex_ecc_total_ded_pipe0_count",
525 &g->ecc.gr.tex_total_ded_pipe0_count);
526
527 error |= nvgpu_gr_ecc_stat_create(dev,
528 0,
529 "tex_ecc_unique_sec_pipe0_count",
530 &g->ecc.gr.tex_unique_sec_pipe0_count);
531
532 error |= nvgpu_gr_ecc_stat_create(dev,
533 0,
534 "tex_ecc_unique_ded_pipe0_count",
535 &g->ecc.gr.tex_unique_ded_pipe0_count);
536
537 error |= nvgpu_gr_ecc_stat_create(dev,
538 0,
539 "tex_ecc_total_sec_pipe1_count",
540 &g->ecc.gr.tex_total_sec_pipe1_count);
541
542 error |= nvgpu_gr_ecc_stat_create(dev,
543 0,
544 "tex_ecc_total_ded_pipe1_count",
545 &g->ecc.gr.tex_total_ded_pipe1_count);
546
547 error |= nvgpu_gr_ecc_stat_create(dev,
548 0,
549 "tex_ecc_unique_sec_pipe1_count",
550 &g->ecc.gr.tex_unique_sec_pipe1_count);
551
552 error |= nvgpu_gr_ecc_stat_create(dev,
553 0,
554 "tex_ecc_unique_ded_pipe1_count",
555 &g->ecc.gr.tex_unique_ded_pipe1_count);
556
557 error |= nvgpu_gr_ecc_stat_create(dev,
558 2,
559 "ecc_sec_count",
560 &g->ecc.ltc.l2_sec_count);
561
562 error |= nvgpu_gr_ecc_stat_create(dev,
563 2,
564 "ecc_ded_count",
565 &g->ecc.ltc.l2_ded_count);
566
567 if (error)
568 dev_err(dev, "Failed to create sysfs attributes!\n");
569}
570
571void gr_gp10b_remove_sysfs(struct gk20a *g)
572{
573 struct device *dev = dev_from_gk20a(g);
574
575 if (!g->ecc.gr.sm_lrf_single_err_count.counters)
576 return;
577
578 nvgpu_gr_ecc_stat_remove(dev,
579 0,
580 &g->ecc.gr.sm_lrf_single_err_count);
581
582 nvgpu_gr_ecc_stat_remove(dev,
583 0,
584 &g->ecc.gr.sm_lrf_double_err_count);
585
586 nvgpu_gr_ecc_stat_remove(dev,
587 0,
588 &g->ecc.gr.sm_shm_sec_count);
589
590 nvgpu_gr_ecc_stat_remove(dev,
591 0,
592 &g->ecc.gr.sm_shm_sed_count);
593
594 nvgpu_gr_ecc_stat_remove(dev,
595 0,
596 &g->ecc.gr.sm_shm_ded_count);
597
598 nvgpu_gr_ecc_stat_remove(dev,
599 0,
600 &g->ecc.gr.tex_total_sec_pipe0_count);
601
602 nvgpu_gr_ecc_stat_remove(dev,
603 0,
604 &g->ecc.gr.tex_total_ded_pipe0_count);
605
606 nvgpu_gr_ecc_stat_remove(dev,
607 0,
608 &g->ecc.gr.tex_unique_sec_pipe0_count);
609
610 nvgpu_gr_ecc_stat_remove(dev,
611 0,
612 &g->ecc.gr.tex_unique_ded_pipe0_count);
613
614 nvgpu_gr_ecc_stat_remove(dev,
615 0,
616 &g->ecc.gr.tex_total_sec_pipe1_count);
617
618 nvgpu_gr_ecc_stat_remove(dev,
619 0,
620 &g->ecc.gr.tex_total_ded_pipe1_count);
621
622 nvgpu_gr_ecc_stat_remove(dev,
623 0,
624 &g->ecc.gr.tex_unique_sec_pipe1_count);
625
626 nvgpu_gr_ecc_stat_remove(dev,
627 0,
628 &g->ecc.gr.tex_unique_ded_pipe1_count);
629
630 nvgpu_gr_ecc_stat_remove(dev,
631 2,
632 &g->ecc.ltc.l2_sec_count);
633
634 nvgpu_gr_ecc_stat_remove(dev,
635 2,
636 &g->ecc.ltc.l2_ded_count);
637}
diff --git a/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.h b/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.h
index 6de90275..85b46b9a 100644
--- a/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.h
+++ b/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.h
@@ -18,6 +18,5 @@
18#define _PLATFORM_GP10B_TEGRA_H_ 18#define _PLATFORM_GP10B_TEGRA_H_
19 19
20#include "gp10b/gr_gp10b.h" 20#include "gp10b/gr_gp10b.h"
21#include "platform_ecc_sysfs.h"
22 21
23#endif 22#endif
diff --git a/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c b/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c
index d62e7932..c9c13197 100644
--- a/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c
+++ b/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c
@@ -39,7 +39,6 @@
39 39
40#include "platform_gp10b.h" 40#include "platform_gp10b.h"
41#include "platform_gp10b_tegra.h" 41#include "platform_gp10b_tegra.h"
42#include "platform_ecc_sysfs.h"
43 42
44#include "os_linux.h" 43#include "os_linux.h"
45#include "platform_gk20a_tegra.h" 44#include "platform_gk20a_tegra.h"
@@ -94,11 +93,6 @@ static int gv11b_tegra_late_probe(struct device *dev)
94 93
95static int gv11b_tegra_remove(struct device *dev) 94static int gv11b_tegra_remove(struct device *dev)
96{ 95{
97 struct gk20a *g = get_gk20a(dev);
98
99 if (g->ops.gr.remove_gr_sysfs)
100 g->ops.gr.remove_gr_sysfs(g);
101
102 gv11b_tegra_scale_exit(dev); 96 gv11b_tegra_scale_exit(dev);
103 97
104#ifdef CONFIG_TEGRA_GK20A_NVHOST 98#ifdef CONFIG_TEGRA_GK20A_NVHOST
@@ -261,328 +255,3 @@ struct gk20a_platform gv11b_tegra_platform = {
261 255
262 .secure_buffer_size = 667648, 256 .secure_buffer_size = 667648,
263}; 257};
264
265void gr_gv11b_create_sysfs(struct gk20a *g)
266{
267 struct device *dev = dev_from_gk20a(g);
268 int error = 0;
269
270 /* This stat creation function is called on GR init. GR can get
271 initialized multiple times but we only need to create the ECC
272 stats once. Therefore, add the following check to avoid
273 creating duplicate stat sysfs nodes. */
274 if (g->ecc.gr.sm_l1_tag_corrected_err_count.counters != NULL)
275 return;
276
277 gr_gp10b_create_sysfs(g);
278
279 error |= nvgpu_gr_ecc_stat_create(dev,
280 0,
281 "sm_l1_tag_ecc_corrected_err_count",
282 &g->ecc.gr.sm_l1_tag_corrected_err_count);
283
284 error |= nvgpu_gr_ecc_stat_create(dev,
285 0,
286 "sm_l1_tag_ecc_uncorrected_err_count",
287 &g->ecc.gr.sm_l1_tag_uncorrected_err_count);
288
289 error |= nvgpu_gr_ecc_stat_create(dev,
290 0,
291 "sm_cbu_ecc_corrected_err_count",
292 &g->ecc.gr.sm_cbu_corrected_err_count);
293
294 error |= nvgpu_gr_ecc_stat_create(dev,
295 0,
296 "sm_cbu_ecc_uncorrected_err_count",
297 &g->ecc.gr.sm_cbu_uncorrected_err_count);
298
299 error |= nvgpu_gr_ecc_stat_create(dev,
300 0,
301 "sm_l1_data_ecc_corrected_err_count",
302 &g->ecc.gr.sm_l1_data_corrected_err_count);
303
304 error |= nvgpu_gr_ecc_stat_create(dev,
305 0,
306 "sm_l1_data_ecc_uncorrected_err_count",
307 &g->ecc.gr.sm_l1_data_uncorrected_err_count);
308
309 error |= nvgpu_gr_ecc_stat_create(dev,
310 0,
311 "sm_icache_ecc_corrected_err_count",
312 &g->ecc.gr.sm_icache_corrected_err_count);
313
314 error |= nvgpu_gr_ecc_stat_create(dev,
315 0,
316 "sm_icache_ecc_uncorrected_err_count",
317 &g->ecc.gr.sm_icache_uncorrected_err_count);
318
319 error |= nvgpu_gr_ecc_stat_create(dev,
320 0,
321 "gcc_l15_ecc_corrected_err_count",
322 &g->ecc.gr.gcc_l15_corrected_err_count);
323
324 error |= nvgpu_gr_ecc_stat_create(dev,
325 0,
326 "gcc_l15_ecc_uncorrected_err_count",
327 &g->ecc.gr.gcc_l15_uncorrected_err_count);
328
329 error |= nvgpu_ecc_stat_create(dev,
330 g->ltc_count,
331 0,
332 "ltc",
333 NULL,
334 "l2_cache_uncorrected_err_count",
335 &g->ecc.ltc.l2_cache_uncorrected_err_count);
336
337 error |= nvgpu_ecc_stat_create(dev,
338 g->ltc_count,
339 0,
340 "ltc",
341 NULL,
342 "l2_cache_corrected_err_count",
343 &g->ecc.ltc.l2_cache_corrected_err_count);
344
345 error |= nvgpu_ecc_stat_create(dev,
346 1,
347 0,
348 "gpc",
349 NULL,
350 "fecs_ecc_uncorrected_err_count",
351 &g->ecc.gr.fecs_uncorrected_err_count);
352
353 error |= nvgpu_ecc_stat_create(dev,
354 1,
355 0,
356 "gpc",
357 NULL,
358 "fecs_ecc_corrected_err_count",
359 &g->ecc.gr.fecs_corrected_err_count);
360
361 error |= nvgpu_ecc_stat_create(dev,
362 g->gr.gpc_count,
363 0,
364 "gpc",
365 NULL,
366 "gpccs_ecc_uncorrected_err_count",
367 &g->ecc.gr.gpccs_uncorrected_err_count);
368
369 error |= nvgpu_ecc_stat_create(dev,
370 g->gr.gpc_count,
371 0,
372 "gpc",
373 NULL,
374 "gpccs_ecc_corrected_err_count",
375 &g->ecc.gr.gpccs_corrected_err_count);
376
377 error |= nvgpu_ecc_stat_create(dev,
378 g->gr.gpc_count,
379 0,
380 "gpc",
381 NULL,
382 "mmu_l1tlb_ecc_uncorrected_err_count",
383 &g->ecc.gr.mmu_l1tlb_uncorrected_err_count);
384
385 error |= nvgpu_ecc_stat_create(dev,
386 g->gr.gpc_count,
387 0,
388 "gpc",
389 NULL,
390 "mmu_l1tlb_ecc_corrected_err_count",
391 &g->ecc.gr.mmu_l1tlb_corrected_err_count);
392
393 error |= nvgpu_ecc_stat_create(dev,
394 1,
395 0,
396 "eng",
397 NULL,
398 "mmu_l2tlb_ecc_uncorrected_err_count",
399 &g->ecc.fb.mmu_l2tlb_uncorrected_err_count);
400
401 error |= nvgpu_ecc_stat_create(dev,
402 1,
403 0,
404 "eng",
405 NULL,
406 "mmu_l2tlb_ecc_corrected_err_count",
407 &g->ecc.fb.mmu_l2tlb_corrected_err_count);
408
409 error |= nvgpu_ecc_stat_create(dev,
410 1,
411 0,
412 "eng",
413 NULL,
414 "mmu_hubtlb_ecc_uncorrected_err_count",
415 &g->ecc.fb.mmu_hubtlb_uncorrected_err_count);
416
417 error |= nvgpu_ecc_stat_create(dev,
418 1,
419 0,
420 "eng",
421 NULL,
422 "mmu_hubtlb_ecc_corrected_err_count",
423 &g->ecc.fb.mmu_hubtlb_corrected_err_count);
424
425 error |= nvgpu_ecc_stat_create(dev,
426 1,
427 0,
428 "eng",
429 NULL,
430 "mmu_fillunit_ecc_uncorrected_err_count",
431 &g->ecc.fb.mmu_fillunit_uncorrected_err_count);
432
433 error |= nvgpu_ecc_stat_create(dev,
434 1,
435 0,
436 "eng",
437 NULL,
438 "mmu_fillunit_ecc_corrected_err_count",
439 &g->ecc.fb.mmu_fillunit_corrected_err_count);
440
441 error |= nvgpu_ecc_stat_create(dev,
442 1,
443 0,
444 "eng",
445 NULL,
446 "pmu_ecc_uncorrected_err_count",
447 &g->ecc.pmu.pmu_uncorrected_err_count);
448
449 error |= nvgpu_ecc_stat_create(dev,
450 1,
451 0,
452 "eng",
453 NULL,
454 "pmu_ecc_corrected_err_count",
455 &g->ecc.pmu.pmu_corrected_err_count);
456
457 if (error)
458 dev_err(dev, "Failed to create gv11b sysfs attributes!\n");
459}
460
461void gr_gv11b_remove_sysfs(struct gk20a *g)
462{
463 struct device *dev = dev_from_gk20a(g);
464
465 if (!g->ecc.gr.sm_l1_tag_corrected_err_count.counters)
466 return;
467 gr_gp10b_remove_sysfs(g);
468
469 nvgpu_gr_ecc_stat_remove(dev,
470 0,
471 &g->ecc.gr.sm_l1_tag_corrected_err_count);
472
473 nvgpu_gr_ecc_stat_remove(dev,
474 0,
475 &g->ecc.gr.sm_l1_tag_uncorrected_err_count);
476
477 nvgpu_gr_ecc_stat_remove(dev,
478 0,
479 &g->ecc.gr.sm_cbu_corrected_err_count);
480
481 nvgpu_gr_ecc_stat_remove(dev,
482 0,
483 &g->ecc.gr.sm_cbu_uncorrected_err_count);
484
485 nvgpu_gr_ecc_stat_remove(dev,
486 0,
487 &g->ecc.gr.sm_l1_data_corrected_err_count);
488
489 nvgpu_gr_ecc_stat_remove(dev,
490 0,
491 &g->ecc.gr.sm_l1_data_uncorrected_err_count);
492
493 nvgpu_gr_ecc_stat_remove(dev,
494 0,
495 &g->ecc.gr.sm_icache_corrected_err_count);
496
497 nvgpu_gr_ecc_stat_remove(dev,
498 0,
499 &g->ecc.gr.sm_icache_uncorrected_err_count);
500
501 nvgpu_gr_ecc_stat_remove(dev,
502 0,
503 &g->ecc.gr.gcc_l15_corrected_err_count);
504
505 nvgpu_gr_ecc_stat_remove(dev,
506 0,
507 &g->ecc.gr.gcc_l15_uncorrected_err_count);
508
509 nvgpu_ecc_stat_remove(dev,
510 g->ltc_count,
511 0,
512 &g->ecc.ltc.l2_cache_uncorrected_err_count);
513
514 nvgpu_ecc_stat_remove(dev,
515 g->ltc_count,
516 0,
517 &g->ecc.ltc.l2_cache_corrected_err_count);
518
519 nvgpu_ecc_stat_remove(dev,
520 1,
521 0,
522 &g->ecc.gr.fecs_uncorrected_err_count);
523
524 nvgpu_ecc_stat_remove(dev,
525 1,
526 0,
527 &g->ecc.gr.fecs_corrected_err_count);
528
529 nvgpu_ecc_stat_remove(dev,
530 g->gr.gpc_count,
531 0,
532 &g->ecc.gr.gpccs_uncorrected_err_count);
533
534 nvgpu_ecc_stat_remove(dev,
535 g->gr.gpc_count,
536 0,
537 &g->ecc.gr.gpccs_corrected_err_count);
538
539 nvgpu_ecc_stat_remove(dev,
540 g->gr.gpc_count,
541 0,
542 &g->ecc.gr.mmu_l1tlb_uncorrected_err_count);
543
544 nvgpu_ecc_stat_remove(dev,
545 g->gr.gpc_count,
546 0,
547 &g->ecc.gr.mmu_l1tlb_corrected_err_count);
548
549 nvgpu_ecc_stat_remove(dev,
550 1,
551 0,
552 &g->ecc.fb.mmu_l2tlb_uncorrected_err_count);
553
554 nvgpu_ecc_stat_remove(dev,
555 1,
556 0,
557 &g->ecc.fb.mmu_l2tlb_corrected_err_count);
558
559 nvgpu_ecc_stat_remove(dev,
560 1,
561 0,
562 &g->ecc.fb.mmu_hubtlb_uncorrected_err_count);
563
564 nvgpu_ecc_stat_remove(dev,
565 1,
566 0,
567 &g->ecc.fb.mmu_hubtlb_corrected_err_count);
568
569 nvgpu_ecc_stat_remove(dev,
570 1,
571 0,
572 &g->ecc.fb.mmu_fillunit_uncorrected_err_count);
573
574 nvgpu_ecc_stat_remove(dev,
575 1,
576 0,
577 &g->ecc.fb.mmu_fillunit_corrected_err_count);
578
579 nvgpu_ecc_stat_remove(dev,
580 1,
581 0,
582 &g->ecc.pmu.pmu_uncorrected_err_count);
583
584 nvgpu_ecc_stat_remove(dev,
585 1,
586 0,
587 &g->ecc.pmu.pmu_corrected_err_count);
588}
diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c
index 090ac7b4..fc0f9c84 100644
--- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c
@@ -215,10 +215,6 @@ static const struct gpu_ops vgpu_gp10b_ops = {
215 .update_boosted_ctx = NULL, 215 .update_boosted_ctx = NULL,
216 .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3, 216 .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
217 .set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4, 217 .set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4,
218#ifdef CONFIG_SYSFS
219 .create_gr_sysfs = gr_gp10b_create_sysfs,
220 .remove_gr_sysfs = gr_gp10b_remove_sysfs,
221#endif
222 .set_ctxsw_preemption_mode = 218 .set_ctxsw_preemption_mode =
223 vgpu_gr_gp10b_set_ctxsw_preemption_mode, 219 vgpu_gr_gp10b_set_ctxsw_preemption_mode,
224 .init_ctxsw_hdr_data = gr_gp10b_init_ctxsw_hdr_data, 220 .init_ctxsw_hdr_data = gr_gp10b_init_ctxsw_hdr_data,
diff --git a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c
index 5da9fed5..dbd00c23 100644
--- a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c
+++ b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c
@@ -232,10 +232,6 @@ static const struct gpu_ops vgpu_gv11b_ops = {
232 .update_boosted_ctx = NULL, 232 .update_boosted_ctx = NULL,
233 .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3, 233 .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
234 .set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4, 234 .set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4,
235#ifdef CONFIG_SYSFS
236 .create_gr_sysfs = gr_gv11b_create_sysfs,
237 .remove_gr_sysfs = gr_gv11b_remove_sysfs,
238#endif
239 .set_ctxsw_preemption_mode = vgpu_gr_gp10b_set_ctxsw_preemption_mode, 235 .set_ctxsw_preemption_mode = vgpu_gr_gp10b_set_ctxsw_preemption_mode,
240 .is_etpc_addr = gv11b_gr_pri_is_etpc_addr, 236 .is_etpc_addr = gv11b_gr_pri_is_etpc_addr,
241 .egpc_etpc_priv_addr_table = gv11b_gr_egpc_etpc_priv_addr_table, 237 .egpc_etpc_priv_addr_table = gv11b_gr_egpc_etpc_priv_addr_table,