diff options
author | Richard Zhao <rizhao@nvidia.com> | 2018-06-26 20:37:40 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-07-19 19:43:58 -0400 |
commit | 7f14aafc2c02eb0fab458324d0ba91a7fdea3086 (patch) | |
tree | cda9f48839fbde3444fde521a9b0069eb06cd81a /drivers/gpu/nvgpu/gv11b | |
parent | 5ff1b3fe5a30c926e59a55ad25dd4daf430c8579 (diff) |
gpu: nvgpu: rework ecc structure and sysfs
- create common file common/ecc.c which include common functions for add
ecc counters and remove counters.
- common code will create a list of all counter which make it easier to
iterate all counters.
- Add chip specific file for adding ecc counters.
- add linux specific file os/linux/ecc_sysfs.c to export counters to
sysfs.
- remove obsolete code
- MISRA violation for using snprintf is not solved, tracking with
jira NVGPU-859
Jira NVGPUT-115
Change-Id: I1905c43c5c9b2b131199807533dee8e63ddc12f4
Signed-off-by: Richard Zhao <rizhao@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1763536
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b')
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/ecc_gv11b.c | 181 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/ecc_gv11b.h | 28 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 48 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/hal_gv11b.c | 6 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/ltc_gv11b.c | 10 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/pmu_gv11b.c | 8 |
6 files changed, 241 insertions, 40 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/ecc_gv11b.c b/drivers/gpu/nvgpu/gv11b/ecc_gv11b.c new file mode 100644 index 00000000..6e29bf94 --- /dev/null +++ b/drivers/gpu/nvgpu/gv11b/ecc_gv11b.c | |||
@@ -0,0 +1,181 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
20 | * DEALINGS IN THE SOFTWARE. | ||
21 | */ | ||
22 | |||
23 | #include <nvgpu/ecc.h> | ||
24 | |||
25 | #include "gk20a/gk20a.h" | ||
26 | #include "gv11b/ecc_gv11b.h" | ||
27 | |||
28 | int gv11b_ecc_init(struct gk20a *g) | ||
29 | { | ||
30 | int err; | ||
31 | |||
32 | err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_lrf_ecc_single_err_count); | ||
33 | if (err != 0) { | ||
34 | goto done; | ||
35 | } | ||
36 | err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_lrf_ecc_double_err_count); | ||
37 | if (err != 0) { | ||
38 | goto done; | ||
39 | } | ||
40 | |||
41 | err = NVGPU_ECC_COUNTER_INIT_PER_TPC( | ||
42 | sm_l1_tag_ecc_corrected_err_count); | ||
43 | if (err != 0) { | ||
44 | goto done; | ||
45 | } | ||
46 | err = NVGPU_ECC_COUNTER_INIT_PER_TPC( | ||
47 | sm_l1_tag_ecc_uncorrected_err_count); | ||
48 | if (err != 0) { | ||
49 | goto done; | ||
50 | } | ||
51 | |||
52 | err = NVGPU_ECC_COUNTER_INIT_PER_TPC( | ||
53 | sm_cbu_ecc_corrected_err_count); | ||
54 | if (err != 0) { | ||
55 | goto done; | ||
56 | } | ||
57 | err = NVGPU_ECC_COUNTER_INIT_PER_TPC( | ||
58 | sm_cbu_ecc_uncorrected_err_count); | ||
59 | if (err != 0) { | ||
60 | goto done; | ||
61 | } | ||
62 | |||
63 | err = NVGPU_ECC_COUNTER_INIT_PER_TPC( | ||
64 | sm_l1_data_ecc_corrected_err_count); | ||
65 | if (err != 0) { | ||
66 | goto done; | ||
67 | } | ||
68 | err = NVGPU_ECC_COUNTER_INIT_PER_TPC( | ||
69 | sm_l1_data_ecc_uncorrected_err_count); | ||
70 | if (err != 0) { | ||
71 | goto done; | ||
72 | } | ||
73 | |||
74 | err = NVGPU_ECC_COUNTER_INIT_PER_TPC( | ||
75 | sm_icache_ecc_corrected_err_count); | ||
76 | if (err != 0) { | ||
77 | goto done; | ||
78 | } | ||
79 | err = NVGPU_ECC_COUNTER_INIT_PER_TPC( | ||
80 | sm_icache_ecc_uncorrected_err_count); | ||
81 | if (err != 0) { | ||
82 | goto done; | ||
83 | } | ||
84 | |||
85 | err = NVGPU_ECC_COUNTER_INIT_PER_GPC( | ||
86 | gcc_l15_ecc_corrected_err_count); | ||
87 | if (err != 0) { | ||
88 | goto done; | ||
89 | } | ||
90 | err = NVGPU_ECC_COUNTER_INIT_PER_GPC( | ||
91 | gcc_l15_ecc_uncorrected_err_count); | ||
92 | if (err != 0) { | ||
93 | goto done; | ||
94 | } | ||
95 | |||
96 | err = NVGPU_ECC_COUNTER_INIT_PER_LTS(ecc_sec_count); | ||
97 | if (err != 0) { | ||
98 | goto done; | ||
99 | } | ||
100 | err = NVGPU_ECC_COUNTER_INIT_PER_LTS(ecc_ded_count); | ||
101 | if (err != 0) { | ||
102 | goto done; | ||
103 | } | ||
104 | |||
105 | err = NVGPU_ECC_COUNTER_INIT_GR(fecs_ecc_uncorrected_err_count); | ||
106 | if (err != 0) { | ||
107 | goto done; | ||
108 | } | ||
109 | err = NVGPU_ECC_COUNTER_INIT_GR(fecs_ecc_corrected_err_count); | ||
110 | if (err != 0) { | ||
111 | goto done; | ||
112 | } | ||
113 | |||
114 | err = NVGPU_ECC_COUNTER_INIT_PER_GPC( | ||
115 | gpccs_ecc_uncorrected_err_count); | ||
116 | if (err != 0) { | ||
117 | goto done; | ||
118 | } | ||
119 | err = NVGPU_ECC_COUNTER_INIT_PER_GPC( | ||
120 | gpccs_ecc_corrected_err_count); | ||
121 | if (err != 0) { | ||
122 | goto done; | ||
123 | } | ||
124 | |||
125 | err = NVGPU_ECC_COUNTER_INIT_PER_GPC( | ||
126 | mmu_l1tlb_ecc_uncorrected_err_count); | ||
127 | if (err != 0) { | ||
128 | goto done; | ||
129 | } | ||
130 | err = NVGPU_ECC_COUNTER_INIT_PER_GPC( | ||
131 | mmu_l1tlb_ecc_corrected_err_count); | ||
132 | if (err != 0) { | ||
133 | goto done; | ||
134 | } | ||
135 | |||
136 | err = NVGPU_ECC_COUNTER_INIT_FB(mmu_l2tlb_ecc_uncorrected_err_count); | ||
137 | if (err != 0) { | ||
138 | goto done; | ||
139 | } | ||
140 | err = NVGPU_ECC_COUNTER_INIT_FB(mmu_l2tlb_ecc_corrected_err_count); | ||
141 | if (err != 0) { | ||
142 | goto done; | ||
143 | } | ||
144 | |||
145 | err = NVGPU_ECC_COUNTER_INIT_FB(mmu_hubtlb_ecc_uncorrected_err_count); | ||
146 | if (err != 0) { | ||
147 | goto done; | ||
148 | } | ||
149 | err = NVGPU_ECC_COUNTER_INIT_FB(mmu_hubtlb_ecc_corrected_err_count); | ||
150 | if (err != 0) { | ||
151 | goto done; | ||
152 | } | ||
153 | |||
154 | err = NVGPU_ECC_COUNTER_INIT_FB( | ||
155 | mmu_fillunit_ecc_uncorrected_err_count); | ||
156 | if (err != 0) { | ||
157 | goto done; | ||
158 | } | ||
159 | err = NVGPU_ECC_COUNTER_INIT_FB( | ||
160 | mmu_fillunit_ecc_corrected_err_count); | ||
161 | if (err != 0) { | ||
162 | goto done; | ||
163 | } | ||
164 | |||
165 | err = NVGPU_ECC_COUNTER_INIT_PMU(pmu_ecc_uncorrected_err_count); | ||
166 | if (err != 0) { | ||
167 | goto done; | ||
168 | } | ||
169 | err = NVGPU_ECC_COUNTER_INIT_PMU(pmu_ecc_corrected_err_count); | ||
170 | if (err != 0) { | ||
171 | goto done; | ||
172 | } | ||
173 | |||
174 | done: | ||
175 | if (err != 0) { | ||
176 | nvgpu_err(g, "ecc counter allocate failed, err=%d", err); | ||
177 | nvgpu_ecc_free(g); | ||
178 | } | ||
179 | |||
180 | return err; | ||
181 | } | ||
diff --git a/drivers/gpu/nvgpu/gv11b/ecc_gv11b.h b/drivers/gpu/nvgpu/gv11b/ecc_gv11b.h new file mode 100644 index 00000000..ce0f12b9 --- /dev/null +++ b/drivers/gpu/nvgpu/gv11b/ecc_gv11b.h | |||
@@ -0,0 +1,28 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
20 | * DEALINGS IN THE SOFTWARE. | ||
21 | */ | ||
22 | |||
23 | #ifndef __ECC_GV11B_H__ | ||
24 | #define __ECC_GV11B_H__ | ||
25 | |||
26 | int gv11b_ecc_init(struct gk20a *g); | ||
27 | |||
28 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index d3fe5f65..c2f47a20 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c | |||
@@ -198,7 +198,7 @@ static int gr_gv11b_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32 tpc, | |||
198 | l1_tag_corrected_err_count_delta += | 198 | l1_tag_corrected_err_count_delta += |
199 | (is_l1_tag_ecc_corrected_total_err_overflow << | 199 | (is_l1_tag_ecc_corrected_total_err_overflow << |
200 | gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_total_s()); | 200 | gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_total_s()); |
201 | g->ecc.gr.sm_l1_tag_corrected_err_count.counters[tpc] += | 201 | g->ecc.gr.sm_l1_tag_ecc_corrected_err_count[gpc][tpc].counter += |
202 | l1_tag_corrected_err_count_delta; | 202 | l1_tag_corrected_err_count_delta; |
203 | gk20a_writel(g, | 203 | gk20a_writel(g, |
204 | gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_r() + offset, | 204 | gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_r() + offset, |
@@ -213,7 +213,7 @@ static int gr_gv11b_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32 tpc, | |||
213 | l1_tag_uncorrected_err_count_delta += | 213 | l1_tag_uncorrected_err_count_delta += |
214 | (is_l1_tag_ecc_uncorrected_total_err_overflow << | 214 | (is_l1_tag_ecc_uncorrected_total_err_overflow << |
215 | gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_total_s()); | 215 | gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_total_s()); |
216 | g->ecc.gr.sm_l1_tag_uncorrected_err_count.counters[tpc] += | 216 | g->ecc.gr.sm_l1_tag_ecc_uncorrected_err_count[gpc][tpc].counter += |
217 | l1_tag_uncorrected_err_count_delta; | 217 | l1_tag_uncorrected_err_count_delta; |
218 | gk20a_writel(g, | 218 | gk20a_writel(g, |
219 | gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_r() + offset, | 219 | gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_r() + offset, |
@@ -290,7 +290,7 @@ static int gr_gv11b_handle_lrf_exception(struct gk20a *g, u32 gpc, u32 tpc, | |||
290 | lrf_corrected_err_count_delta += | 290 | lrf_corrected_err_count_delta += |
291 | (is_lrf_ecc_corrected_total_err_overflow << | 291 | (is_lrf_ecc_corrected_total_err_overflow << |
292 | gr_pri_gpc0_tpc0_sm_lrf_ecc_corrected_err_count_total_s()); | 292 | gr_pri_gpc0_tpc0_sm_lrf_ecc_corrected_err_count_total_s()); |
293 | g->ecc.gr.sm_lrf_single_err_count.counters[tpc] += | 293 | g->ecc.gr.sm_lrf_ecc_single_err_count[gpc][tpc].counter += |
294 | lrf_corrected_err_count_delta; | 294 | lrf_corrected_err_count_delta; |
295 | gk20a_writel(g, | 295 | gk20a_writel(g, |
296 | gr_pri_gpc0_tpc0_sm_lrf_ecc_corrected_err_count_r() + offset, | 296 | gr_pri_gpc0_tpc0_sm_lrf_ecc_corrected_err_count_r() + offset, |
@@ -305,7 +305,7 @@ static int gr_gv11b_handle_lrf_exception(struct gk20a *g, u32 gpc, u32 tpc, | |||
305 | lrf_uncorrected_err_count_delta += | 305 | lrf_uncorrected_err_count_delta += |
306 | (is_lrf_ecc_uncorrected_total_err_overflow << | 306 | (is_lrf_ecc_uncorrected_total_err_overflow << |
307 | gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_total_s()); | 307 | gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_total_s()); |
308 | g->ecc.gr.sm_lrf_double_err_count.counters[tpc] += | 308 | g->ecc.gr.sm_lrf_ecc_double_err_count[gpc][tpc].counter += |
309 | lrf_uncorrected_err_count_delta; | 309 | lrf_uncorrected_err_count_delta; |
310 | gk20a_writel(g, | 310 | gk20a_writel(g, |
311 | gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_r() + offset, | 311 | gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_r() + offset, |
@@ -449,7 +449,7 @@ static int gr_gv11b_handle_cbu_exception(struct gk20a *g, u32 gpc, u32 tpc, | |||
449 | cbu_corrected_err_count_delta += | 449 | cbu_corrected_err_count_delta += |
450 | (is_cbu_ecc_corrected_total_err_overflow << | 450 | (is_cbu_ecc_corrected_total_err_overflow << |
451 | gr_pri_gpc0_tpc0_sm_cbu_ecc_corrected_err_count_total_s()); | 451 | gr_pri_gpc0_tpc0_sm_cbu_ecc_corrected_err_count_total_s()); |
452 | g->ecc.gr.sm_cbu_corrected_err_count.counters[tpc] += | 452 | g->ecc.gr.sm_cbu_ecc_corrected_err_count[gpc][tpc].counter += |
453 | cbu_corrected_err_count_delta; | 453 | cbu_corrected_err_count_delta; |
454 | gk20a_writel(g, | 454 | gk20a_writel(g, |
455 | gr_pri_gpc0_tpc0_sm_cbu_ecc_corrected_err_count_r() + offset, | 455 | gr_pri_gpc0_tpc0_sm_cbu_ecc_corrected_err_count_r() + offset, |
@@ -464,7 +464,7 @@ static int gr_gv11b_handle_cbu_exception(struct gk20a *g, u32 gpc, u32 tpc, | |||
464 | cbu_uncorrected_err_count_delta += | 464 | cbu_uncorrected_err_count_delta += |
465 | (is_cbu_ecc_uncorrected_total_err_overflow << | 465 | (is_cbu_ecc_uncorrected_total_err_overflow << |
466 | gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_total_s()); | 466 | gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_total_s()); |
467 | g->ecc.gr.sm_cbu_uncorrected_err_count.counters[tpc] += | 467 | g->ecc.gr.sm_cbu_ecc_uncorrected_err_count[gpc][tpc].counter += |
468 | cbu_uncorrected_err_count_delta; | 468 | cbu_uncorrected_err_count_delta; |
469 | gk20a_writel(g, | 469 | gk20a_writel(g, |
470 | gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_r() + offset, | 470 | gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_r() + offset, |
@@ -529,7 +529,7 @@ static int gr_gv11b_handle_l1_data_exception(struct gk20a *g, u32 gpc, u32 tpc, | |||
529 | l1_data_corrected_err_count_delta += | 529 | l1_data_corrected_err_count_delta += |
530 | (is_l1_data_ecc_corrected_total_err_overflow << | 530 | (is_l1_data_ecc_corrected_total_err_overflow << |
531 | gr_pri_gpc0_tpc0_sm_l1_data_ecc_corrected_err_count_total_s()); | 531 | gr_pri_gpc0_tpc0_sm_l1_data_ecc_corrected_err_count_total_s()); |
532 | g->ecc.gr.sm_l1_data_corrected_err_count.counters[tpc] += | 532 | g->ecc.gr.sm_l1_data_ecc_corrected_err_count[gpc][tpc].counter += |
533 | l1_data_corrected_err_count_delta; | 533 | l1_data_corrected_err_count_delta; |
534 | gk20a_writel(g, | 534 | gk20a_writel(g, |
535 | gr_pri_gpc0_tpc0_sm_l1_data_ecc_corrected_err_count_r() + offset, | 535 | gr_pri_gpc0_tpc0_sm_l1_data_ecc_corrected_err_count_r() + offset, |
@@ -544,7 +544,7 @@ static int gr_gv11b_handle_l1_data_exception(struct gk20a *g, u32 gpc, u32 tpc, | |||
544 | l1_data_uncorrected_err_count_delta += | 544 | l1_data_uncorrected_err_count_delta += |
545 | (is_l1_data_ecc_uncorrected_total_err_overflow << | 545 | (is_l1_data_ecc_uncorrected_total_err_overflow << |
546 | gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_total_s()); | 546 | gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_total_s()); |
547 | g->ecc.gr.sm_l1_data_uncorrected_err_count.counters[tpc] += | 547 | g->ecc.gr.sm_l1_data_ecc_uncorrected_err_count[gpc][tpc].counter += |
548 | l1_data_uncorrected_err_count_delta; | 548 | l1_data_uncorrected_err_count_delta; |
549 | gk20a_writel(g, | 549 | gk20a_writel(g, |
550 | gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_r() + offset, | 550 | gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_r() + offset, |
@@ -613,7 +613,7 @@ static int gr_gv11b_handle_icache_exception(struct gk20a *g, u32 gpc, u32 tpc, | |||
613 | icache_corrected_err_count_delta += | 613 | icache_corrected_err_count_delta += |
614 | (is_icache_ecc_corrected_total_err_overflow << | 614 | (is_icache_ecc_corrected_total_err_overflow << |
615 | gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_total_s()); | 615 | gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_total_s()); |
616 | g->ecc.gr.sm_icache_corrected_err_count.counters[tpc] += | 616 | g->ecc.gr.sm_icache_ecc_corrected_err_count[gpc][tpc].counter += |
617 | icache_corrected_err_count_delta; | 617 | icache_corrected_err_count_delta; |
618 | gk20a_writel(g, | 618 | gk20a_writel(g, |
619 | gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_r() + offset, | 619 | gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_r() + offset, |
@@ -628,7 +628,7 @@ static int gr_gv11b_handle_icache_exception(struct gk20a *g, u32 gpc, u32 tpc, | |||
628 | icache_uncorrected_err_count_delta += | 628 | icache_uncorrected_err_count_delta += |
629 | (is_icache_ecc_uncorrected_total_err_overflow << | 629 | (is_icache_ecc_uncorrected_total_err_overflow << |
630 | gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_total_s()); | 630 | gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_total_s()); |
631 | g->ecc.gr.sm_icache_uncorrected_err_count.counters[tpc] += | 631 | g->ecc.gr.sm_icache_ecc_uncorrected_err_count[gpc][tpc].counter += |
632 | icache_uncorrected_err_count_delta; | 632 | icache_uncorrected_err_count_delta; |
633 | gk20a_writel(g, | 633 | gk20a_writel(g, |
634 | gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_r() + offset, | 634 | gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_r() + offset, |
@@ -717,7 +717,7 @@ int gr_gv11b_handle_gcc_exception(struct gk20a *g, u32 gpc, u32 tpc, | |||
717 | gcc_l15_corrected_err_count_delta += | 717 | gcc_l15_corrected_err_count_delta += |
718 | (is_gcc_l15_ecc_corrected_total_err_overflow << | 718 | (is_gcc_l15_ecc_corrected_total_err_overflow << |
719 | gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_total_s()); | 719 | gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_total_s()); |
720 | g->ecc.gr.gcc_l15_corrected_err_count.counters[gpc] += | 720 | g->ecc.gr.gcc_l15_ecc_corrected_err_count[gpc].counter += |
721 | gcc_l15_corrected_err_count_delta; | 721 | gcc_l15_corrected_err_count_delta; |
722 | gk20a_writel(g, | 722 | gk20a_writel(g, |
723 | gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_r() + offset, | 723 | gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_r() + offset, |
@@ -732,7 +732,7 @@ int gr_gv11b_handle_gcc_exception(struct gk20a *g, u32 gpc, u32 tpc, | |||
732 | gcc_l15_uncorrected_err_count_delta += | 732 | gcc_l15_uncorrected_err_count_delta += |
733 | (is_gcc_l15_ecc_uncorrected_total_err_overflow << | 733 | (is_gcc_l15_ecc_uncorrected_total_err_overflow << |
734 | gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_total_s()); | 734 | gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_total_s()); |
735 | g->ecc.gr.gcc_l15_uncorrected_err_count.counters[gpc] += | 735 | g->ecc.gr.gcc_l15_ecc_uncorrected_err_count[gpc].counter += |
736 | gcc_l15_uncorrected_err_count_delta; | 736 | gcc_l15_uncorrected_err_count_delta; |
737 | gk20a_writel(g, | 737 | gk20a_writel(g, |
738 | gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_r() + offset, | 738 | gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_r() + offset, |
@@ -802,9 +802,9 @@ static int gr_gv11b_handle_gpcmmu_ecc_exception(struct gk20a *g, u32 gpc, | |||
802 | uncorrected_delta += (0x1UL << gr_gpc0_mmu_l1tlb_ecc_uncorrected_err_count_total_s()); | 802 | uncorrected_delta += (0x1UL << gr_gpc0_mmu_l1tlb_ecc_uncorrected_err_count_total_s()); |
803 | 803 | ||
804 | 804 | ||
805 | g->ecc.gr.mmu_l1tlb_corrected_err_count.counters[gpc] += | 805 | g->ecc.gr.mmu_l1tlb_ecc_corrected_err_count[gpc].counter += |
806 | corrected_delta; | 806 | corrected_delta; |
807 | g->ecc.gr.mmu_l1tlb_uncorrected_err_count.counters[gpc] += | 807 | g->ecc.gr.mmu_l1tlb_ecc_uncorrected_err_count[gpc].counter += |
808 | uncorrected_delta; | 808 | uncorrected_delta; |
809 | nvgpu_log(g, gpu_dbg_intr, | 809 | nvgpu_log(g, gpu_dbg_intr, |
810 | "mmu l1tlb gpc:%d ecc interrupt intr: 0x%x", gpc, hww_esr); | 810 | "mmu l1tlb gpc:%d ecc interrupt intr: 0x%x", gpc, hww_esr); |
@@ -824,8 +824,8 @@ static int gr_gv11b_handle_gpcmmu_ecc_exception(struct gk20a *g, u32 gpc, | |||
824 | "ecc error address: 0x%x", ecc_addr); | 824 | "ecc error address: 0x%x", ecc_addr); |
825 | nvgpu_log(g, gpu_dbg_intr, | 825 | nvgpu_log(g, gpu_dbg_intr, |
826 | "ecc error count corrected: %d, uncorrected %d", | 826 | "ecc error count corrected: %d, uncorrected %d", |
827 | g->ecc.gr.mmu_l1tlb_corrected_err_count.counters[gpc], | 827 | g->ecc.gr.mmu_l1tlb_ecc_corrected_err_count[gpc].counter, |
828 | g->ecc.gr.mmu_l1tlb_uncorrected_err_count.counters[gpc]); | 828 | g->ecc.gr.mmu_l1tlb_ecc_uncorrected_err_count[gpc].counter); |
829 | 829 | ||
830 | return ret; | 830 | return ret; |
831 | } | 831 | } |
@@ -880,9 +880,9 @@ static int gr_gv11b_handle_gpccs_ecc_exception(struct gk20a *g, u32 gpc, | |||
880 | gk20a_writel(g, gr_gpc0_gpccs_falcon_ecc_status_r() + offset, | 880 | gk20a_writel(g, gr_gpc0_gpccs_falcon_ecc_status_r() + offset, |
881 | gr_gpc0_gpccs_falcon_ecc_status_reset_task_f()); | 881 | gr_gpc0_gpccs_falcon_ecc_status_reset_task_f()); |
882 | 882 | ||
883 | g->ecc.gr.gpccs_corrected_err_count.counters[gpc] += | 883 | g->ecc.gr.gpccs_ecc_corrected_err_count[gpc].counter += |
884 | corrected_delta; | 884 | corrected_delta; |
885 | g->ecc.gr.gpccs_uncorrected_err_count.counters[gpc] += | 885 | g->ecc.gr.gpccs_ecc_uncorrected_err_count[gpc].counter += |
886 | uncorrected_delta; | 886 | uncorrected_delta; |
887 | nvgpu_log(g, gpu_dbg_intr, | 887 | nvgpu_log(g, gpu_dbg_intr, |
888 | "gppcs gpc:%d ecc interrupt intr: 0x%x", gpc, hww_esr); | 888 | "gppcs gpc:%d ecc interrupt intr: 0x%x", gpc, hww_esr); |
@@ -907,8 +907,8 @@ static int gr_gv11b_handle_gpccs_ecc_exception(struct gk20a *g, u32 gpc, | |||
907 | 907 | ||
908 | nvgpu_log(g, gpu_dbg_intr, | 908 | nvgpu_log(g, gpu_dbg_intr, |
909 | "ecc error count corrected: %d, uncorrected %d", | 909 | "ecc error count corrected: %d, uncorrected %d", |
910 | g->ecc.gr.gpccs_corrected_err_count.counters[gpc], | 910 | g->ecc.gr.gpccs_ecc_corrected_err_count[gpc].counter, |
911 | g->ecc.gr.gpccs_uncorrected_err_count.counters[gpc]); | 911 | g->ecc.gr.gpccs_ecc_uncorrected_err_count[gpc].counter); |
912 | 912 | ||
913 | return ret; | 913 | return ret; |
914 | } | 914 | } |
@@ -2419,9 +2419,9 @@ static void gr_gv11b_handle_fecs_ecc_error(struct gk20a *g, u32 intr) | |||
2419 | gk20a_writel(g, gr_fecs_falcon_ecc_status_r(), | 2419 | gk20a_writel(g, gr_fecs_falcon_ecc_status_r(), |
2420 | gr_fecs_falcon_ecc_status_reset_task_f()); | 2420 | gr_fecs_falcon_ecc_status_reset_task_f()); |
2421 | 2421 | ||
2422 | g->ecc.gr.fecs_corrected_err_count.counters[0] += | 2422 | g->ecc.gr.fecs_ecc_corrected_err_count[0].counter += |
2423 | corrected_delta; | 2423 | corrected_delta; |
2424 | g->ecc.gr.fecs_uncorrected_err_count.counters[0] += | 2424 | g->ecc.gr.fecs_ecc_uncorrected_err_count[0].counter += |
2425 | uncorrected_delta; | 2425 | uncorrected_delta; |
2426 | 2426 | ||
2427 | nvgpu_log(g, gpu_dbg_intr, | 2427 | nvgpu_log(g, gpu_dbg_intr, |
@@ -2450,8 +2450,8 @@ static void gr_gv11b_handle_fecs_ecc_error(struct gk20a *g, u32 intr) | |||
2450 | 2450 | ||
2451 | nvgpu_log(g, gpu_dbg_intr, | 2451 | nvgpu_log(g, gpu_dbg_intr, |
2452 | "ecc error count corrected: %d, uncorrected %d", | 2452 | "ecc error count corrected: %d, uncorrected %d", |
2453 | g->ecc.gr.fecs_corrected_err_count.counters[0], | 2453 | g->ecc.gr.fecs_ecc_corrected_err_count[0].counter, |
2454 | g->ecc.gr.fecs_uncorrected_err_count.counters[0]); | 2454 | g->ecc.gr.fecs_ecc_uncorrected_err_count[0].counter); |
2455 | } | 2455 | } |
2456 | } | 2456 | } |
2457 | 2457 | ||
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index 366d6928..efac772c 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c | |||
@@ -84,6 +84,7 @@ | |||
84 | #include "regops_gv11b.h" | 84 | #include "regops_gv11b.h" |
85 | #include "subctx_gv11b.h" | 85 | #include "subctx_gv11b.h" |
86 | #include "therm_gv11b.h" | 86 | #include "therm_gv11b.h" |
87 | #include "ecc_gv11b.h" | ||
87 | 88 | ||
88 | #include <nvgpu/ptimer.h> | 89 | #include <nvgpu/ptimer.h> |
89 | #include <nvgpu/debug.h> | 90 | #include <nvgpu/debug.h> |
@@ -369,10 +370,7 @@ static const struct gpu_ops gv11b_ops = { | |||
369 | .update_boosted_ctx = gr_gp10b_update_boosted_ctx, | 370 | .update_boosted_ctx = gr_gp10b_update_boosted_ctx, |
370 | .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3, | 371 | .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3, |
371 | .set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4, | 372 | .set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4, |
372 | #ifdef CONFIG_SYSFS | 373 | .init_ecc = gv11b_ecc_init, |
373 | .create_gr_sysfs = gr_gv11b_create_sysfs, | ||
374 | .remove_gr_sysfs = gr_gv11b_remove_sysfs, | ||
375 | #endif | ||
376 | .set_ctxsw_preemption_mode = gr_gv11b_set_ctxsw_preemption_mode, | 374 | .set_ctxsw_preemption_mode = gr_gv11b_set_ctxsw_preemption_mode, |
377 | .is_etpc_addr = gv11b_gr_pri_is_etpc_addr, | 375 | .is_etpc_addr = gv11b_gr_pri_is_etpc_addr, |
378 | .egpc_etpc_priv_addr_table = gv11b_gr_egpc_etpc_priv_addr_table, | 376 | .egpc_etpc_priv_addr_table = gv11b_gr_egpc_etpc_priv_addr_table, |
diff --git a/drivers/gpu/nvgpu/gv11b/ltc_gv11b.c b/drivers/gpu/nvgpu/gv11b/ltc_gv11b.c index 48faa4d2..db797bde 100644 --- a/drivers/gpu/nvgpu/gv11b/ltc_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/ltc_gv11b.c | |||
@@ -90,13 +90,11 @@ void gv11b_ltc_isr(struct gk20a *g) | |||
90 | u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt; | 90 | u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt; |
91 | u32 corrected_delta, uncorrected_delta; | 91 | u32 corrected_delta, uncorrected_delta; |
92 | u32 corrected_overflow, uncorrected_overflow; | 92 | u32 corrected_overflow, uncorrected_overflow; |
93 | u32 ltc_corrected, ltc_uncorrected; | ||
94 | 93 | ||
95 | mc_intr = gk20a_readl(g, mc_intr_ltc_r()); | 94 | mc_intr = gk20a_readl(g, mc_intr_ltc_r()); |
96 | for (ltc = 0; ltc < g->ltc_count; ltc++) { | 95 | for (ltc = 0; ltc < g->ltc_count; ltc++) { |
97 | if ((mc_intr & 1U << ltc) == 0) | 96 | if ((mc_intr & 1U << ltc) == 0) |
98 | continue; | 97 | continue; |
99 | ltc_corrected = ltc_uncorrected = 0U; | ||
100 | 98 | ||
101 | for (slice = 0; slice < g->gr.slices_per_ltc; slice++) { | 99 | for (slice = 0; slice < g->gr.slices_per_ltc; slice++) { |
102 | u32 offset = ltc_stride * ltc + lts_stride * slice; | 100 | u32 offset = ltc_stride * ltc + lts_stride * slice; |
@@ -150,8 +148,8 @@ void gv11b_ltc_isr(struct gk20a *g) | |||
150 | if (uncorrected_overflow) | 148 | if (uncorrected_overflow) |
151 | uncorrected_delta += (0x1U << ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_s()); | 149 | uncorrected_delta += (0x1U << ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_s()); |
152 | 150 | ||
153 | ltc_corrected += corrected_delta; | 151 | g->ecc.ltc.ecc_sec_count[ltc][slice].counter += corrected_delta; |
154 | ltc_uncorrected += uncorrected_delta; | 152 | g->ecc.ltc.ecc_ded_count[ltc][slice].counter += uncorrected_delta; |
155 | nvgpu_log(g, gpu_dbg_intr, | 153 | nvgpu_log(g, gpu_dbg_intr, |
156 | "ltc:%d lts: %d cache ecc interrupt intr: 0x%x", ltc, slice, ltc_intr3); | 154 | "ltc:%d lts: %d cache ecc interrupt intr: 0x%x", ltc, slice, ltc_intr3); |
157 | 155 | ||
@@ -177,10 +175,6 @@ void gv11b_ltc_isr(struct gk20a *g) | |||
177 | } | 175 | } |
178 | 176 | ||
179 | } | 177 | } |
180 | g->ecc.ltc.l2_cache_corrected_err_count.counters[ltc] += | ||
181 | ltc_corrected; | ||
182 | g->ecc.ltc.l2_cache_uncorrected_err_count.counters[ltc] += | ||
183 | ltc_uncorrected; | ||
184 | 178 | ||
185 | } | 179 | } |
186 | 180 | ||
diff --git a/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c b/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c index 3f0e2f22..9a2e9c00 100644 --- a/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c | |||
@@ -343,8 +343,8 @@ void gv11b_pmu_handle_ext_irq(struct gk20a *g, u32 intr0) | |||
343 | if (uncorrected_overflow) | 343 | if (uncorrected_overflow) |
344 | uncorrected_delta += (0x1UL << pwr_pmu_falcon_ecc_uncorrected_err_count_total_s()); | 344 | uncorrected_delta += (0x1UL << pwr_pmu_falcon_ecc_uncorrected_err_count_total_s()); |
345 | 345 | ||
346 | g->ecc.pmu.pmu_corrected_err_count.counters[0] += corrected_delta; | 346 | g->ecc.pmu.pmu_ecc_corrected_err_count[0].counter += corrected_delta; |
347 | g->ecc.pmu.pmu_uncorrected_err_count.counters[0] += uncorrected_delta; | 347 | g->ecc.pmu.pmu_ecc_uncorrected_err_count[0].counter += uncorrected_delta; |
348 | 348 | ||
349 | nvgpu_log(g, gpu_dbg_intr, | 349 | nvgpu_log(g, gpu_dbg_intr, |
350 | "pmu ecc interrupt intr1: 0x%x", intr1); | 350 | "pmu ecc interrupt intr1: 0x%x", intr1); |
@@ -371,8 +371,8 @@ void gv11b_pmu_handle_ext_irq(struct gk20a *g, u32 intr0) | |||
371 | 371 | ||
372 | nvgpu_log(g, gpu_dbg_intr, | 372 | nvgpu_log(g, gpu_dbg_intr, |
373 | "ecc error count corrected: %d, uncorrected %d", | 373 | "ecc error count corrected: %d, uncorrected %d", |
374 | g->ecc.pmu.pmu_corrected_err_count.counters[0], | 374 | g->ecc.pmu.pmu_ecc_corrected_err_count[0].counter, |
375 | g->ecc.pmu.pmu_uncorrected_err_count.counters[0]); | 375 | g->ecc.pmu.pmu_ecc_uncorrected_err_count[0].counter); |
376 | } | 376 | } |
377 | } | 377 | } |
378 | } | 378 | } |