summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gv11b
diff options
context:
space:
mode:
authorRichard Zhao <rizhao@nvidia.com>2018-06-26 20:37:40 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-07-19 19:43:58 -0400
commit7f14aafc2c02eb0fab458324d0ba91a7fdea3086 (patch)
treecda9f48839fbde3444fde521a9b0069eb06cd81a /drivers/gpu/nvgpu/gv11b
parent5ff1b3fe5a30c926e59a55ad25dd4daf430c8579 (diff)
gpu: nvgpu: rework ecc structure and sysfs
- create common file common/ecc.c which include common functions for add ecc counters and remove counters. - common code will create a list of all counter which make it easier to iterate all counters. - Add chip specific file for adding ecc counters. - add linux specific file os/linux/ecc_sysfs.c to export counters to sysfs. - remove obsolete code - MISRA violation for using snprintf is not solved, tracking with jira NVGPU-859 Jira NVGPUT-115 Change-Id: I1905c43c5c9b2b131199807533dee8e63ddc12f4 Signed-off-by: Richard Zhao <rizhao@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1763536 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b')
-rw-r--r--drivers/gpu/nvgpu/gv11b/ecc_gv11b.c181
-rw-r--r--drivers/gpu/nvgpu/gv11b/ecc_gv11b.h28
-rw-r--r--drivers/gpu/nvgpu/gv11b/gr_gv11b.c48
-rw-r--r--drivers/gpu/nvgpu/gv11b/hal_gv11b.c6
-rw-r--r--drivers/gpu/nvgpu/gv11b/ltc_gv11b.c10
-rw-r--r--drivers/gpu/nvgpu/gv11b/pmu_gv11b.c8
6 files changed, 241 insertions, 40 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/ecc_gv11b.c b/drivers/gpu/nvgpu/gv11b/ecc_gv11b.c
new file mode 100644
index 00000000..6e29bf94
--- /dev/null
+++ b/drivers/gpu/nvgpu/gv11b/ecc_gv11b.c
@@ -0,0 +1,181 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#include <nvgpu/ecc.h>
24
25#include "gk20a/gk20a.h"
26#include "gv11b/ecc_gv11b.h"
27
28int gv11b_ecc_init(struct gk20a *g)
29{
30 int err;
31
32 err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_lrf_ecc_single_err_count);
33 if (err != 0) {
34 goto done;
35 }
36 err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_lrf_ecc_double_err_count);
37 if (err != 0) {
38 goto done;
39 }
40
41 err = NVGPU_ECC_COUNTER_INIT_PER_TPC(
42 sm_l1_tag_ecc_corrected_err_count);
43 if (err != 0) {
44 goto done;
45 }
46 err = NVGPU_ECC_COUNTER_INIT_PER_TPC(
47 sm_l1_tag_ecc_uncorrected_err_count);
48 if (err != 0) {
49 goto done;
50 }
51
52 err = NVGPU_ECC_COUNTER_INIT_PER_TPC(
53 sm_cbu_ecc_corrected_err_count);
54 if (err != 0) {
55 goto done;
56 }
57 err = NVGPU_ECC_COUNTER_INIT_PER_TPC(
58 sm_cbu_ecc_uncorrected_err_count);
59 if (err != 0) {
60 goto done;
61 }
62
63 err = NVGPU_ECC_COUNTER_INIT_PER_TPC(
64 sm_l1_data_ecc_corrected_err_count);
65 if (err != 0) {
66 goto done;
67 }
68 err = NVGPU_ECC_COUNTER_INIT_PER_TPC(
69 sm_l1_data_ecc_uncorrected_err_count);
70 if (err != 0) {
71 goto done;
72 }
73
74 err = NVGPU_ECC_COUNTER_INIT_PER_TPC(
75 sm_icache_ecc_corrected_err_count);
76 if (err != 0) {
77 goto done;
78 }
79 err = NVGPU_ECC_COUNTER_INIT_PER_TPC(
80 sm_icache_ecc_uncorrected_err_count);
81 if (err != 0) {
82 goto done;
83 }
84
85 err = NVGPU_ECC_COUNTER_INIT_PER_GPC(
86 gcc_l15_ecc_corrected_err_count);
87 if (err != 0) {
88 goto done;
89 }
90 err = NVGPU_ECC_COUNTER_INIT_PER_GPC(
91 gcc_l15_ecc_uncorrected_err_count);
92 if (err != 0) {
93 goto done;
94 }
95
96 err = NVGPU_ECC_COUNTER_INIT_PER_LTS(ecc_sec_count);
97 if (err != 0) {
98 goto done;
99 }
100 err = NVGPU_ECC_COUNTER_INIT_PER_LTS(ecc_ded_count);
101 if (err != 0) {
102 goto done;
103 }
104
105 err = NVGPU_ECC_COUNTER_INIT_GR(fecs_ecc_uncorrected_err_count);
106 if (err != 0) {
107 goto done;
108 }
109 err = NVGPU_ECC_COUNTER_INIT_GR(fecs_ecc_corrected_err_count);
110 if (err != 0) {
111 goto done;
112 }
113
114 err = NVGPU_ECC_COUNTER_INIT_PER_GPC(
115 gpccs_ecc_uncorrected_err_count);
116 if (err != 0) {
117 goto done;
118 }
119 err = NVGPU_ECC_COUNTER_INIT_PER_GPC(
120 gpccs_ecc_corrected_err_count);
121 if (err != 0) {
122 goto done;
123 }
124
125 err = NVGPU_ECC_COUNTER_INIT_PER_GPC(
126 mmu_l1tlb_ecc_uncorrected_err_count);
127 if (err != 0) {
128 goto done;
129 }
130 err = NVGPU_ECC_COUNTER_INIT_PER_GPC(
131 mmu_l1tlb_ecc_corrected_err_count);
132 if (err != 0) {
133 goto done;
134 }
135
136 err = NVGPU_ECC_COUNTER_INIT_FB(mmu_l2tlb_ecc_uncorrected_err_count);
137 if (err != 0) {
138 goto done;
139 }
140 err = NVGPU_ECC_COUNTER_INIT_FB(mmu_l2tlb_ecc_corrected_err_count);
141 if (err != 0) {
142 goto done;
143 }
144
145 err = NVGPU_ECC_COUNTER_INIT_FB(mmu_hubtlb_ecc_uncorrected_err_count);
146 if (err != 0) {
147 goto done;
148 }
149 err = NVGPU_ECC_COUNTER_INIT_FB(mmu_hubtlb_ecc_corrected_err_count);
150 if (err != 0) {
151 goto done;
152 }
153
154 err = NVGPU_ECC_COUNTER_INIT_FB(
155 mmu_fillunit_ecc_uncorrected_err_count);
156 if (err != 0) {
157 goto done;
158 }
159 err = NVGPU_ECC_COUNTER_INIT_FB(
160 mmu_fillunit_ecc_corrected_err_count);
161 if (err != 0) {
162 goto done;
163 }
164
165 err = NVGPU_ECC_COUNTER_INIT_PMU(pmu_ecc_uncorrected_err_count);
166 if (err != 0) {
167 goto done;
168 }
169 err = NVGPU_ECC_COUNTER_INIT_PMU(pmu_ecc_corrected_err_count);
170 if (err != 0) {
171 goto done;
172 }
173
174done:
175 if (err != 0) {
176 nvgpu_err(g, "ecc counter allocate failed, err=%d", err);
177 nvgpu_ecc_free(g);
178 }
179
180 return err;
181}
diff --git a/drivers/gpu/nvgpu/gv11b/ecc_gv11b.h b/drivers/gpu/nvgpu/gv11b/ecc_gv11b.h
new file mode 100644
index 00000000..ce0f12b9
--- /dev/null
+++ b/drivers/gpu/nvgpu/gv11b/ecc_gv11b.h
@@ -0,0 +1,28 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#ifndef __ECC_GV11B_H__
24#define __ECC_GV11B_H__
25
26int gv11b_ecc_init(struct gk20a *g);
27
28#endif
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index d3fe5f65..c2f47a20 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -198,7 +198,7 @@ static int gr_gv11b_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32 tpc,
198 l1_tag_corrected_err_count_delta += 198 l1_tag_corrected_err_count_delta +=
199 (is_l1_tag_ecc_corrected_total_err_overflow << 199 (is_l1_tag_ecc_corrected_total_err_overflow <<
200 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_total_s()); 200 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_total_s());
201 g->ecc.gr.sm_l1_tag_corrected_err_count.counters[tpc] += 201 g->ecc.gr.sm_l1_tag_ecc_corrected_err_count[gpc][tpc].counter +=
202 l1_tag_corrected_err_count_delta; 202 l1_tag_corrected_err_count_delta;
203 gk20a_writel(g, 203 gk20a_writel(g,
204 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_r() + offset, 204 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_r() + offset,
@@ -213,7 +213,7 @@ static int gr_gv11b_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32 tpc,
213 l1_tag_uncorrected_err_count_delta += 213 l1_tag_uncorrected_err_count_delta +=
214 (is_l1_tag_ecc_uncorrected_total_err_overflow << 214 (is_l1_tag_ecc_uncorrected_total_err_overflow <<
215 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_total_s()); 215 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_total_s());
216 g->ecc.gr.sm_l1_tag_uncorrected_err_count.counters[tpc] += 216 g->ecc.gr.sm_l1_tag_ecc_uncorrected_err_count[gpc][tpc].counter +=
217 l1_tag_uncorrected_err_count_delta; 217 l1_tag_uncorrected_err_count_delta;
218 gk20a_writel(g, 218 gk20a_writel(g,
219 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_r() + offset, 219 gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_r() + offset,
@@ -290,7 +290,7 @@ static int gr_gv11b_handle_lrf_exception(struct gk20a *g, u32 gpc, u32 tpc,
290 lrf_corrected_err_count_delta += 290 lrf_corrected_err_count_delta +=
291 (is_lrf_ecc_corrected_total_err_overflow << 291 (is_lrf_ecc_corrected_total_err_overflow <<
292 gr_pri_gpc0_tpc0_sm_lrf_ecc_corrected_err_count_total_s()); 292 gr_pri_gpc0_tpc0_sm_lrf_ecc_corrected_err_count_total_s());
293 g->ecc.gr.sm_lrf_single_err_count.counters[tpc] += 293 g->ecc.gr.sm_lrf_ecc_single_err_count[gpc][tpc].counter +=
294 lrf_corrected_err_count_delta; 294 lrf_corrected_err_count_delta;
295 gk20a_writel(g, 295 gk20a_writel(g,
296 gr_pri_gpc0_tpc0_sm_lrf_ecc_corrected_err_count_r() + offset, 296 gr_pri_gpc0_tpc0_sm_lrf_ecc_corrected_err_count_r() + offset,
@@ -305,7 +305,7 @@ static int gr_gv11b_handle_lrf_exception(struct gk20a *g, u32 gpc, u32 tpc,
305 lrf_uncorrected_err_count_delta += 305 lrf_uncorrected_err_count_delta +=
306 (is_lrf_ecc_uncorrected_total_err_overflow << 306 (is_lrf_ecc_uncorrected_total_err_overflow <<
307 gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_total_s()); 307 gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_total_s());
308 g->ecc.gr.sm_lrf_double_err_count.counters[tpc] += 308 g->ecc.gr.sm_lrf_ecc_double_err_count[gpc][tpc].counter +=
309 lrf_uncorrected_err_count_delta; 309 lrf_uncorrected_err_count_delta;
310 gk20a_writel(g, 310 gk20a_writel(g,
311 gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_r() + offset, 311 gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_r() + offset,
@@ -449,7 +449,7 @@ static int gr_gv11b_handle_cbu_exception(struct gk20a *g, u32 gpc, u32 tpc,
449 cbu_corrected_err_count_delta += 449 cbu_corrected_err_count_delta +=
450 (is_cbu_ecc_corrected_total_err_overflow << 450 (is_cbu_ecc_corrected_total_err_overflow <<
451 gr_pri_gpc0_tpc0_sm_cbu_ecc_corrected_err_count_total_s()); 451 gr_pri_gpc0_tpc0_sm_cbu_ecc_corrected_err_count_total_s());
452 g->ecc.gr.sm_cbu_corrected_err_count.counters[tpc] += 452 g->ecc.gr.sm_cbu_ecc_corrected_err_count[gpc][tpc].counter +=
453 cbu_corrected_err_count_delta; 453 cbu_corrected_err_count_delta;
454 gk20a_writel(g, 454 gk20a_writel(g,
455 gr_pri_gpc0_tpc0_sm_cbu_ecc_corrected_err_count_r() + offset, 455 gr_pri_gpc0_tpc0_sm_cbu_ecc_corrected_err_count_r() + offset,
@@ -464,7 +464,7 @@ static int gr_gv11b_handle_cbu_exception(struct gk20a *g, u32 gpc, u32 tpc,
464 cbu_uncorrected_err_count_delta += 464 cbu_uncorrected_err_count_delta +=
465 (is_cbu_ecc_uncorrected_total_err_overflow << 465 (is_cbu_ecc_uncorrected_total_err_overflow <<
466 gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_total_s()); 466 gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_total_s());
467 g->ecc.gr.sm_cbu_uncorrected_err_count.counters[tpc] += 467 g->ecc.gr.sm_cbu_ecc_uncorrected_err_count[gpc][tpc].counter +=
468 cbu_uncorrected_err_count_delta; 468 cbu_uncorrected_err_count_delta;
469 gk20a_writel(g, 469 gk20a_writel(g,
470 gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_r() + offset, 470 gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_r() + offset,
@@ -529,7 +529,7 @@ static int gr_gv11b_handle_l1_data_exception(struct gk20a *g, u32 gpc, u32 tpc,
529 l1_data_corrected_err_count_delta += 529 l1_data_corrected_err_count_delta +=
530 (is_l1_data_ecc_corrected_total_err_overflow << 530 (is_l1_data_ecc_corrected_total_err_overflow <<
531 gr_pri_gpc0_tpc0_sm_l1_data_ecc_corrected_err_count_total_s()); 531 gr_pri_gpc0_tpc0_sm_l1_data_ecc_corrected_err_count_total_s());
532 g->ecc.gr.sm_l1_data_corrected_err_count.counters[tpc] += 532 g->ecc.gr.sm_l1_data_ecc_corrected_err_count[gpc][tpc].counter +=
533 l1_data_corrected_err_count_delta; 533 l1_data_corrected_err_count_delta;
534 gk20a_writel(g, 534 gk20a_writel(g,
535 gr_pri_gpc0_tpc0_sm_l1_data_ecc_corrected_err_count_r() + offset, 535 gr_pri_gpc0_tpc0_sm_l1_data_ecc_corrected_err_count_r() + offset,
@@ -544,7 +544,7 @@ static int gr_gv11b_handle_l1_data_exception(struct gk20a *g, u32 gpc, u32 tpc,
544 l1_data_uncorrected_err_count_delta += 544 l1_data_uncorrected_err_count_delta +=
545 (is_l1_data_ecc_uncorrected_total_err_overflow << 545 (is_l1_data_ecc_uncorrected_total_err_overflow <<
546 gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_total_s()); 546 gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_total_s());
547 g->ecc.gr.sm_l1_data_uncorrected_err_count.counters[tpc] += 547 g->ecc.gr.sm_l1_data_ecc_uncorrected_err_count[gpc][tpc].counter +=
548 l1_data_uncorrected_err_count_delta; 548 l1_data_uncorrected_err_count_delta;
549 gk20a_writel(g, 549 gk20a_writel(g,
550 gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_r() + offset, 550 gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_r() + offset,
@@ -613,7 +613,7 @@ static int gr_gv11b_handle_icache_exception(struct gk20a *g, u32 gpc, u32 tpc,
613 icache_corrected_err_count_delta += 613 icache_corrected_err_count_delta +=
614 (is_icache_ecc_corrected_total_err_overflow << 614 (is_icache_ecc_corrected_total_err_overflow <<
615 gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_total_s()); 615 gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_total_s());
616 g->ecc.gr.sm_icache_corrected_err_count.counters[tpc] += 616 g->ecc.gr.sm_icache_ecc_corrected_err_count[gpc][tpc].counter +=
617 icache_corrected_err_count_delta; 617 icache_corrected_err_count_delta;
618 gk20a_writel(g, 618 gk20a_writel(g,
619 gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_r() + offset, 619 gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_r() + offset,
@@ -628,7 +628,7 @@ static int gr_gv11b_handle_icache_exception(struct gk20a *g, u32 gpc, u32 tpc,
628 icache_uncorrected_err_count_delta += 628 icache_uncorrected_err_count_delta +=
629 (is_icache_ecc_uncorrected_total_err_overflow << 629 (is_icache_ecc_uncorrected_total_err_overflow <<
630 gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_total_s()); 630 gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_total_s());
631 g->ecc.gr.sm_icache_uncorrected_err_count.counters[tpc] += 631 g->ecc.gr.sm_icache_ecc_uncorrected_err_count[gpc][tpc].counter +=
632 icache_uncorrected_err_count_delta; 632 icache_uncorrected_err_count_delta;
633 gk20a_writel(g, 633 gk20a_writel(g,
634 gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_r() + offset, 634 gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_r() + offset,
@@ -717,7 +717,7 @@ int gr_gv11b_handle_gcc_exception(struct gk20a *g, u32 gpc, u32 tpc,
717 gcc_l15_corrected_err_count_delta += 717 gcc_l15_corrected_err_count_delta +=
718 (is_gcc_l15_ecc_corrected_total_err_overflow << 718 (is_gcc_l15_ecc_corrected_total_err_overflow <<
719 gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_total_s()); 719 gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_total_s());
720 g->ecc.gr.gcc_l15_corrected_err_count.counters[gpc] += 720 g->ecc.gr.gcc_l15_ecc_corrected_err_count[gpc].counter +=
721 gcc_l15_corrected_err_count_delta; 721 gcc_l15_corrected_err_count_delta;
722 gk20a_writel(g, 722 gk20a_writel(g,
723 gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_r() + offset, 723 gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_r() + offset,
@@ -732,7 +732,7 @@ int gr_gv11b_handle_gcc_exception(struct gk20a *g, u32 gpc, u32 tpc,
732 gcc_l15_uncorrected_err_count_delta += 732 gcc_l15_uncorrected_err_count_delta +=
733 (is_gcc_l15_ecc_uncorrected_total_err_overflow << 733 (is_gcc_l15_ecc_uncorrected_total_err_overflow <<
734 gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_total_s()); 734 gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_total_s());
735 g->ecc.gr.gcc_l15_uncorrected_err_count.counters[gpc] += 735 g->ecc.gr.gcc_l15_ecc_uncorrected_err_count[gpc].counter +=
736 gcc_l15_uncorrected_err_count_delta; 736 gcc_l15_uncorrected_err_count_delta;
737 gk20a_writel(g, 737 gk20a_writel(g,
738 gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_r() + offset, 738 gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_r() + offset,
@@ -802,9 +802,9 @@ static int gr_gv11b_handle_gpcmmu_ecc_exception(struct gk20a *g, u32 gpc,
802 uncorrected_delta += (0x1UL << gr_gpc0_mmu_l1tlb_ecc_uncorrected_err_count_total_s()); 802 uncorrected_delta += (0x1UL << gr_gpc0_mmu_l1tlb_ecc_uncorrected_err_count_total_s());
803 803
804 804
805 g->ecc.gr.mmu_l1tlb_corrected_err_count.counters[gpc] += 805 g->ecc.gr.mmu_l1tlb_ecc_corrected_err_count[gpc].counter +=
806 corrected_delta; 806 corrected_delta;
807 g->ecc.gr.mmu_l1tlb_uncorrected_err_count.counters[gpc] += 807 g->ecc.gr.mmu_l1tlb_ecc_uncorrected_err_count[gpc].counter +=
808 uncorrected_delta; 808 uncorrected_delta;
809 nvgpu_log(g, gpu_dbg_intr, 809 nvgpu_log(g, gpu_dbg_intr,
810 "mmu l1tlb gpc:%d ecc interrupt intr: 0x%x", gpc, hww_esr); 810 "mmu l1tlb gpc:%d ecc interrupt intr: 0x%x", gpc, hww_esr);
@@ -824,8 +824,8 @@ static int gr_gv11b_handle_gpcmmu_ecc_exception(struct gk20a *g, u32 gpc,
824 "ecc error address: 0x%x", ecc_addr); 824 "ecc error address: 0x%x", ecc_addr);
825 nvgpu_log(g, gpu_dbg_intr, 825 nvgpu_log(g, gpu_dbg_intr,
826 "ecc error count corrected: %d, uncorrected %d", 826 "ecc error count corrected: %d, uncorrected %d",
827 g->ecc.gr.mmu_l1tlb_corrected_err_count.counters[gpc], 827 g->ecc.gr.mmu_l1tlb_ecc_corrected_err_count[gpc].counter,
828 g->ecc.gr.mmu_l1tlb_uncorrected_err_count.counters[gpc]); 828 g->ecc.gr.mmu_l1tlb_ecc_uncorrected_err_count[gpc].counter);
829 829
830 return ret; 830 return ret;
831} 831}
@@ -880,9 +880,9 @@ static int gr_gv11b_handle_gpccs_ecc_exception(struct gk20a *g, u32 gpc,
880 gk20a_writel(g, gr_gpc0_gpccs_falcon_ecc_status_r() + offset, 880 gk20a_writel(g, gr_gpc0_gpccs_falcon_ecc_status_r() + offset,
881 gr_gpc0_gpccs_falcon_ecc_status_reset_task_f()); 881 gr_gpc0_gpccs_falcon_ecc_status_reset_task_f());
882 882
883 g->ecc.gr.gpccs_corrected_err_count.counters[gpc] += 883 g->ecc.gr.gpccs_ecc_corrected_err_count[gpc].counter +=
884 corrected_delta; 884 corrected_delta;
885 g->ecc.gr.gpccs_uncorrected_err_count.counters[gpc] += 885 g->ecc.gr.gpccs_ecc_uncorrected_err_count[gpc].counter +=
886 uncorrected_delta; 886 uncorrected_delta;
887 nvgpu_log(g, gpu_dbg_intr, 887 nvgpu_log(g, gpu_dbg_intr,
888 "gppcs gpc:%d ecc interrupt intr: 0x%x", gpc, hww_esr); 888 "gppcs gpc:%d ecc interrupt intr: 0x%x", gpc, hww_esr);
@@ -907,8 +907,8 @@ static int gr_gv11b_handle_gpccs_ecc_exception(struct gk20a *g, u32 gpc,
907 907
908 nvgpu_log(g, gpu_dbg_intr, 908 nvgpu_log(g, gpu_dbg_intr,
909 "ecc error count corrected: %d, uncorrected %d", 909 "ecc error count corrected: %d, uncorrected %d",
910 g->ecc.gr.gpccs_corrected_err_count.counters[gpc], 910 g->ecc.gr.gpccs_ecc_corrected_err_count[gpc].counter,
911 g->ecc.gr.gpccs_uncorrected_err_count.counters[gpc]); 911 g->ecc.gr.gpccs_ecc_uncorrected_err_count[gpc].counter);
912 912
913 return ret; 913 return ret;
914} 914}
@@ -2419,9 +2419,9 @@ static void gr_gv11b_handle_fecs_ecc_error(struct gk20a *g, u32 intr)
2419 gk20a_writel(g, gr_fecs_falcon_ecc_status_r(), 2419 gk20a_writel(g, gr_fecs_falcon_ecc_status_r(),
2420 gr_fecs_falcon_ecc_status_reset_task_f()); 2420 gr_fecs_falcon_ecc_status_reset_task_f());
2421 2421
2422 g->ecc.gr.fecs_corrected_err_count.counters[0] += 2422 g->ecc.gr.fecs_ecc_corrected_err_count[0].counter +=
2423 corrected_delta; 2423 corrected_delta;
2424 g->ecc.gr.fecs_uncorrected_err_count.counters[0] += 2424 g->ecc.gr.fecs_ecc_uncorrected_err_count[0].counter +=
2425 uncorrected_delta; 2425 uncorrected_delta;
2426 2426
2427 nvgpu_log(g, gpu_dbg_intr, 2427 nvgpu_log(g, gpu_dbg_intr,
@@ -2450,8 +2450,8 @@ static void gr_gv11b_handle_fecs_ecc_error(struct gk20a *g, u32 intr)
2450 2450
2451 nvgpu_log(g, gpu_dbg_intr, 2451 nvgpu_log(g, gpu_dbg_intr,
2452 "ecc error count corrected: %d, uncorrected %d", 2452 "ecc error count corrected: %d, uncorrected %d",
2453 g->ecc.gr.fecs_corrected_err_count.counters[0], 2453 g->ecc.gr.fecs_ecc_corrected_err_count[0].counter,
2454 g->ecc.gr.fecs_uncorrected_err_count.counters[0]); 2454 g->ecc.gr.fecs_ecc_uncorrected_err_count[0].counter);
2455 } 2455 }
2456} 2456}
2457 2457
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
index 366d6928..efac772c 100644
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -84,6 +84,7 @@
84#include "regops_gv11b.h" 84#include "regops_gv11b.h"
85#include "subctx_gv11b.h" 85#include "subctx_gv11b.h"
86#include "therm_gv11b.h" 86#include "therm_gv11b.h"
87#include "ecc_gv11b.h"
87 88
88#include <nvgpu/ptimer.h> 89#include <nvgpu/ptimer.h>
89#include <nvgpu/debug.h> 90#include <nvgpu/debug.h>
@@ -369,10 +370,7 @@ static const struct gpu_ops gv11b_ops = {
369 .update_boosted_ctx = gr_gp10b_update_boosted_ctx, 370 .update_boosted_ctx = gr_gp10b_update_boosted_ctx,
370 .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3, 371 .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
371 .set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4, 372 .set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4,
372#ifdef CONFIG_SYSFS 373 .init_ecc = gv11b_ecc_init,
373 .create_gr_sysfs = gr_gv11b_create_sysfs,
374 .remove_gr_sysfs = gr_gv11b_remove_sysfs,
375#endif
376 .set_ctxsw_preemption_mode = gr_gv11b_set_ctxsw_preemption_mode, 374 .set_ctxsw_preemption_mode = gr_gv11b_set_ctxsw_preemption_mode,
377 .is_etpc_addr = gv11b_gr_pri_is_etpc_addr, 375 .is_etpc_addr = gv11b_gr_pri_is_etpc_addr,
378 .egpc_etpc_priv_addr_table = gv11b_gr_egpc_etpc_priv_addr_table, 376 .egpc_etpc_priv_addr_table = gv11b_gr_egpc_etpc_priv_addr_table,
diff --git a/drivers/gpu/nvgpu/gv11b/ltc_gv11b.c b/drivers/gpu/nvgpu/gv11b/ltc_gv11b.c
index 48faa4d2..db797bde 100644
--- a/drivers/gpu/nvgpu/gv11b/ltc_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/ltc_gv11b.c
@@ -90,13 +90,11 @@ void gv11b_ltc_isr(struct gk20a *g)
90 u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt; 90 u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt;
91 u32 corrected_delta, uncorrected_delta; 91 u32 corrected_delta, uncorrected_delta;
92 u32 corrected_overflow, uncorrected_overflow; 92 u32 corrected_overflow, uncorrected_overflow;
93 u32 ltc_corrected, ltc_uncorrected;
94 93
95 mc_intr = gk20a_readl(g, mc_intr_ltc_r()); 94 mc_intr = gk20a_readl(g, mc_intr_ltc_r());
96 for (ltc = 0; ltc < g->ltc_count; ltc++) { 95 for (ltc = 0; ltc < g->ltc_count; ltc++) {
97 if ((mc_intr & 1U << ltc) == 0) 96 if ((mc_intr & 1U << ltc) == 0)
98 continue; 97 continue;
99 ltc_corrected = ltc_uncorrected = 0U;
100 98
101 for (slice = 0; slice < g->gr.slices_per_ltc; slice++) { 99 for (slice = 0; slice < g->gr.slices_per_ltc; slice++) {
102 u32 offset = ltc_stride * ltc + lts_stride * slice; 100 u32 offset = ltc_stride * ltc + lts_stride * slice;
@@ -150,8 +148,8 @@ void gv11b_ltc_isr(struct gk20a *g)
150 if (uncorrected_overflow) 148 if (uncorrected_overflow)
151 uncorrected_delta += (0x1U << ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_s()); 149 uncorrected_delta += (0x1U << ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_s());
152 150
153 ltc_corrected += corrected_delta; 151 g->ecc.ltc.ecc_sec_count[ltc][slice].counter += corrected_delta;
154 ltc_uncorrected += uncorrected_delta; 152 g->ecc.ltc.ecc_ded_count[ltc][slice].counter += uncorrected_delta;
155 nvgpu_log(g, gpu_dbg_intr, 153 nvgpu_log(g, gpu_dbg_intr,
156 "ltc:%d lts: %d cache ecc interrupt intr: 0x%x", ltc, slice, ltc_intr3); 154 "ltc:%d lts: %d cache ecc interrupt intr: 0x%x", ltc, slice, ltc_intr3);
157 155
@@ -177,10 +175,6 @@ void gv11b_ltc_isr(struct gk20a *g)
177 } 175 }
178 176
179 } 177 }
180 g->ecc.ltc.l2_cache_corrected_err_count.counters[ltc] +=
181 ltc_corrected;
182 g->ecc.ltc.l2_cache_uncorrected_err_count.counters[ltc] +=
183 ltc_uncorrected;
184 178
185 } 179 }
186 180
diff --git a/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c b/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c
index 3f0e2f22..9a2e9c00 100644
--- a/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c
@@ -343,8 +343,8 @@ void gv11b_pmu_handle_ext_irq(struct gk20a *g, u32 intr0)
343 if (uncorrected_overflow) 343 if (uncorrected_overflow)
344 uncorrected_delta += (0x1UL << pwr_pmu_falcon_ecc_uncorrected_err_count_total_s()); 344 uncorrected_delta += (0x1UL << pwr_pmu_falcon_ecc_uncorrected_err_count_total_s());
345 345
346 g->ecc.pmu.pmu_corrected_err_count.counters[0] += corrected_delta; 346 g->ecc.pmu.pmu_ecc_corrected_err_count[0].counter += corrected_delta;
347 g->ecc.pmu.pmu_uncorrected_err_count.counters[0] += uncorrected_delta; 347 g->ecc.pmu.pmu_ecc_uncorrected_err_count[0].counter += uncorrected_delta;
348 348
349 nvgpu_log(g, gpu_dbg_intr, 349 nvgpu_log(g, gpu_dbg_intr,
350 "pmu ecc interrupt intr1: 0x%x", intr1); 350 "pmu ecc interrupt intr1: 0x%x", intr1);
@@ -371,8 +371,8 @@ void gv11b_pmu_handle_ext_irq(struct gk20a *g, u32 intr0)
371 371
372 nvgpu_log(g, gpu_dbg_intr, 372 nvgpu_log(g, gpu_dbg_intr,
373 "ecc error count corrected: %d, uncorrected %d", 373 "ecc error count corrected: %d, uncorrected %d",
374 g->ecc.pmu.pmu_corrected_err_count.counters[0], 374 g->ecc.pmu.pmu_ecc_corrected_err_count[0].counter,
375 g->ecc.pmu.pmu_uncorrected_err_count.counters[0]); 375 g->ecc.pmu.pmu_ecc_uncorrected_err_count[0].counter);
376 } 376 }
377 } 377 }
378} 378}