aboutsummaryrefslogtreecommitdiffstats
path: root/include/nvgpu/ecc.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/nvgpu/ecc.h')
-rw-r--r--include/nvgpu/ecc.h162
1 files changed, 162 insertions, 0 deletions
diff --git a/include/nvgpu/ecc.h b/include/nvgpu/ecc.h
new file mode 100644
index 0000000..9b211ef
--- /dev/null
+++ b/include/nvgpu/ecc.h
@@ -0,0 +1,162 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#ifndef NVGPU_ECC_H
24#define NVGPU_ECC_H
25
26#include <nvgpu/types.h>
27#include <nvgpu/list.h>
28
29#define NVGPU_ECC_STAT_NAME_MAX_SIZE 100
30
31struct gk20a;
32
33struct nvgpu_ecc_stat {
34 char name[NVGPU_ECC_STAT_NAME_MAX_SIZE];
35 u32 counter;
36 struct nvgpu_list_node node;
37};
38
39static inline struct nvgpu_ecc_stat *nvgpu_ecc_stat_from_node(
40 struct nvgpu_list_node *node)
41{
42 return (struct nvgpu_ecc_stat *)(
43 (uintptr_t)node - offsetof(struct nvgpu_ecc_stat, node)
44 );
45}
46
47struct nvgpu_ecc {
48 struct {
49 /* stats per tpc */
50
51 struct nvgpu_ecc_stat **sm_lrf_ecc_single_err_count;
52 struct nvgpu_ecc_stat **sm_lrf_ecc_double_err_count;
53
54 struct nvgpu_ecc_stat **sm_shm_ecc_sec_count;
55 struct nvgpu_ecc_stat **sm_shm_ecc_sed_count;
56 struct nvgpu_ecc_stat **sm_shm_ecc_ded_count;
57
58 struct nvgpu_ecc_stat **tex_ecc_total_sec_pipe0_count;
59 struct nvgpu_ecc_stat **tex_ecc_total_ded_pipe0_count;
60 struct nvgpu_ecc_stat **tex_unique_ecc_sec_pipe0_count;
61 struct nvgpu_ecc_stat **tex_unique_ecc_ded_pipe0_count;
62 struct nvgpu_ecc_stat **tex_ecc_total_sec_pipe1_count;
63 struct nvgpu_ecc_stat **tex_ecc_total_ded_pipe1_count;
64 struct nvgpu_ecc_stat **tex_unique_ecc_sec_pipe1_count;
65 struct nvgpu_ecc_stat **tex_unique_ecc_ded_pipe1_count;
66
67 struct nvgpu_ecc_stat **sm_l1_tag_ecc_corrected_err_count;
68 struct nvgpu_ecc_stat **sm_l1_tag_ecc_uncorrected_err_count;
69 struct nvgpu_ecc_stat **sm_cbu_ecc_corrected_err_count;
70 struct nvgpu_ecc_stat **sm_cbu_ecc_uncorrected_err_count;
71 struct nvgpu_ecc_stat **sm_l1_data_ecc_corrected_err_count;
72 struct nvgpu_ecc_stat **sm_l1_data_ecc_uncorrected_err_count;
73 struct nvgpu_ecc_stat **sm_icache_ecc_corrected_err_count;
74 struct nvgpu_ecc_stat **sm_icache_ecc_uncorrected_err_count;
75
76 /* stats per gpc */
77
78 struct nvgpu_ecc_stat *gcc_l15_ecc_corrected_err_count;
79 struct nvgpu_ecc_stat *gcc_l15_ecc_uncorrected_err_count;
80
81 struct nvgpu_ecc_stat *gpccs_ecc_corrected_err_count;
82 struct nvgpu_ecc_stat *gpccs_ecc_uncorrected_err_count;
83 struct nvgpu_ecc_stat *mmu_l1tlb_ecc_corrected_err_count;
84 struct nvgpu_ecc_stat *mmu_l1tlb_ecc_uncorrected_err_count;
85
86 /* stats per device */
87 struct nvgpu_ecc_stat *fecs_ecc_corrected_err_count;
88 struct nvgpu_ecc_stat *fecs_ecc_uncorrected_err_count;
89 } gr;
90
91 struct {
92 /* stats per lts */
93 struct nvgpu_ecc_stat **ecc_sec_count;
94 struct nvgpu_ecc_stat **ecc_ded_count;
95 } ltc;
96
97 struct {
98 /* stats per device */
99 struct nvgpu_ecc_stat *mmu_l2tlb_ecc_corrected_err_count;
100 struct nvgpu_ecc_stat *mmu_l2tlb_ecc_uncorrected_err_count;
101 struct nvgpu_ecc_stat *mmu_hubtlb_ecc_corrected_err_count;
102 struct nvgpu_ecc_stat *mmu_hubtlb_ecc_uncorrected_err_count;
103 struct nvgpu_ecc_stat *mmu_fillunit_ecc_corrected_err_count;
104 struct nvgpu_ecc_stat *mmu_fillunit_ecc_uncorrected_err_count;
105 } fb;
106
107 struct {
108 /* stats per device */
109 struct nvgpu_ecc_stat *pmu_ecc_corrected_err_count;
110 struct nvgpu_ecc_stat *pmu_ecc_uncorrected_err_count;
111 } pmu;
112
113 struct {
114 /* stats per fbpa */
115 struct nvgpu_ecc_stat *fbpa_ecc_sec_err_count;
116 struct nvgpu_ecc_stat *fbpa_ecc_ded_err_count;
117 } fbpa;
118
119 struct nvgpu_list_node stats_list;
120 int stats_count;
121};
122
123int nvgpu_ecc_counter_init_per_tpc(struct gk20a *g,
124 struct nvgpu_ecc_stat ***stat, const char *name);
125#define NVGPU_ECC_COUNTER_INIT_PER_TPC(stat) \
126 nvgpu_ecc_counter_init_per_tpc(g, &g->ecc.gr.stat, #stat)
127
128int nvgpu_ecc_counter_init_per_gpc(struct gk20a *g,
129 struct nvgpu_ecc_stat **stat, const char *name);
130#define NVGPU_ECC_COUNTER_INIT_PER_GPC(stat) \
131 nvgpu_ecc_counter_init_per_gpc(g, &g->ecc.gr.stat, #stat)
132
133int nvgpu_ecc_counter_init(struct gk20a *g,
134 struct nvgpu_ecc_stat **stat, const char *name);
135#define NVGPU_ECC_COUNTER_INIT_GR(stat) \
136 nvgpu_ecc_counter_init(g, &g->ecc.gr.stat, #stat)
137#define NVGPU_ECC_COUNTER_INIT_FB(stat) \
138 nvgpu_ecc_counter_init(g, &g->ecc.fb.stat, #stat)
139#define NVGPU_ECC_COUNTER_INIT_PMU(stat) \
140 nvgpu_ecc_counter_init(g, &g->ecc.pmu.stat, #stat)
141
142int nvgpu_ecc_counter_init_per_lts(struct gk20a *g,
143 struct nvgpu_ecc_stat ***stat, const char *name);
144#define NVGPU_ECC_COUNTER_INIT_PER_LTS(stat) \
145 nvgpu_ecc_counter_init_per_lts(g, &g->ecc.ltc.stat, #stat)
146
147int nvgpu_ecc_counter_init_per_fbpa(struct gk20a *g,
148 struct nvgpu_ecc_stat **stat, const char *name);
149#define NVGPU_ECC_COUNTER_INIT_PER_FBPA(stat) \
150 nvgpu_ecc_counter_init_per_fbpa(g, &g->ecc.fbpa.stat, #stat)
151
152void nvgpu_ecc_free(struct gk20a *g);
153
154int nvgpu_ecc_init_support(struct gk20a *g);
155void nvgpu_ecc_remove_support(struct gk20a *g);
156
157/* OSes to implement */
158
159int nvgpu_ecc_sysfs_init(struct gk20a *g);
160void nvgpu_ecc_sysfs_remove(struct gk20a *g);
161
162#endif