diff options
Diffstat (limited to 'include/nvgpu/ecc.h')
-rw-r--r-- | include/nvgpu/ecc.h | 162 |
1 files changed, 162 insertions, 0 deletions
diff --git a/include/nvgpu/ecc.h b/include/nvgpu/ecc.h new file mode 100644 index 0000000..9b211ef --- /dev/null +++ b/include/nvgpu/ecc.h | |||
@@ -0,0 +1,162 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
20 | * DEALINGS IN THE SOFTWARE. | ||
21 | */ | ||
22 | |||
23 | #ifndef NVGPU_ECC_H | ||
24 | #define NVGPU_ECC_H | ||
25 | |||
26 | #include <nvgpu/types.h> | ||
27 | #include <nvgpu/list.h> | ||
28 | |||
29 | #define NVGPU_ECC_STAT_NAME_MAX_SIZE 100 | ||
30 | |||
31 | struct gk20a; | ||
32 | |||
33 | struct nvgpu_ecc_stat { | ||
34 | char name[NVGPU_ECC_STAT_NAME_MAX_SIZE]; | ||
35 | u32 counter; | ||
36 | struct nvgpu_list_node node; | ||
37 | }; | ||
38 | |||
39 | static inline struct nvgpu_ecc_stat *nvgpu_ecc_stat_from_node( | ||
40 | struct nvgpu_list_node *node) | ||
41 | { | ||
42 | return (struct nvgpu_ecc_stat *)( | ||
43 | (uintptr_t)node - offsetof(struct nvgpu_ecc_stat, node) | ||
44 | ); | ||
45 | } | ||
46 | |||
47 | struct nvgpu_ecc { | ||
48 | struct { | ||
49 | /* stats per tpc */ | ||
50 | |||
51 | struct nvgpu_ecc_stat **sm_lrf_ecc_single_err_count; | ||
52 | struct nvgpu_ecc_stat **sm_lrf_ecc_double_err_count; | ||
53 | |||
54 | struct nvgpu_ecc_stat **sm_shm_ecc_sec_count; | ||
55 | struct nvgpu_ecc_stat **sm_shm_ecc_sed_count; | ||
56 | struct nvgpu_ecc_stat **sm_shm_ecc_ded_count; | ||
57 | |||
58 | struct nvgpu_ecc_stat **tex_ecc_total_sec_pipe0_count; | ||
59 | struct nvgpu_ecc_stat **tex_ecc_total_ded_pipe0_count; | ||
60 | struct nvgpu_ecc_stat **tex_unique_ecc_sec_pipe0_count; | ||
61 | struct nvgpu_ecc_stat **tex_unique_ecc_ded_pipe0_count; | ||
62 | struct nvgpu_ecc_stat **tex_ecc_total_sec_pipe1_count; | ||
63 | struct nvgpu_ecc_stat **tex_ecc_total_ded_pipe1_count; | ||
64 | struct nvgpu_ecc_stat **tex_unique_ecc_sec_pipe1_count; | ||
65 | struct nvgpu_ecc_stat **tex_unique_ecc_ded_pipe1_count; | ||
66 | |||
67 | struct nvgpu_ecc_stat **sm_l1_tag_ecc_corrected_err_count; | ||
68 | struct nvgpu_ecc_stat **sm_l1_tag_ecc_uncorrected_err_count; | ||
69 | struct nvgpu_ecc_stat **sm_cbu_ecc_corrected_err_count; | ||
70 | struct nvgpu_ecc_stat **sm_cbu_ecc_uncorrected_err_count; | ||
71 | struct nvgpu_ecc_stat **sm_l1_data_ecc_corrected_err_count; | ||
72 | struct nvgpu_ecc_stat **sm_l1_data_ecc_uncorrected_err_count; | ||
73 | struct nvgpu_ecc_stat **sm_icache_ecc_corrected_err_count; | ||
74 | struct nvgpu_ecc_stat **sm_icache_ecc_uncorrected_err_count; | ||
75 | |||
76 | /* stats per gpc */ | ||
77 | |||
78 | struct nvgpu_ecc_stat *gcc_l15_ecc_corrected_err_count; | ||
79 | struct nvgpu_ecc_stat *gcc_l15_ecc_uncorrected_err_count; | ||
80 | |||
81 | struct nvgpu_ecc_stat *gpccs_ecc_corrected_err_count; | ||
82 | struct nvgpu_ecc_stat *gpccs_ecc_uncorrected_err_count; | ||
83 | struct nvgpu_ecc_stat *mmu_l1tlb_ecc_corrected_err_count; | ||
84 | struct nvgpu_ecc_stat *mmu_l1tlb_ecc_uncorrected_err_count; | ||
85 | |||
86 | /* stats per device */ | ||
87 | struct nvgpu_ecc_stat *fecs_ecc_corrected_err_count; | ||
88 | struct nvgpu_ecc_stat *fecs_ecc_uncorrected_err_count; | ||
89 | } gr; | ||
90 | |||
91 | struct { | ||
92 | /* stats per lts */ | ||
93 | struct nvgpu_ecc_stat **ecc_sec_count; | ||
94 | struct nvgpu_ecc_stat **ecc_ded_count; | ||
95 | } ltc; | ||
96 | |||
97 | struct { | ||
98 | /* stats per device */ | ||
99 | struct nvgpu_ecc_stat *mmu_l2tlb_ecc_corrected_err_count; | ||
100 | struct nvgpu_ecc_stat *mmu_l2tlb_ecc_uncorrected_err_count; | ||
101 | struct nvgpu_ecc_stat *mmu_hubtlb_ecc_corrected_err_count; | ||
102 | struct nvgpu_ecc_stat *mmu_hubtlb_ecc_uncorrected_err_count; | ||
103 | struct nvgpu_ecc_stat *mmu_fillunit_ecc_corrected_err_count; | ||
104 | struct nvgpu_ecc_stat *mmu_fillunit_ecc_uncorrected_err_count; | ||
105 | } fb; | ||
106 | |||
107 | struct { | ||
108 | /* stats per device */ | ||
109 | struct nvgpu_ecc_stat *pmu_ecc_corrected_err_count; | ||
110 | struct nvgpu_ecc_stat *pmu_ecc_uncorrected_err_count; | ||
111 | } pmu; | ||
112 | |||
113 | struct { | ||
114 | /* stats per fbpa */ | ||
115 | struct nvgpu_ecc_stat *fbpa_ecc_sec_err_count; | ||
116 | struct nvgpu_ecc_stat *fbpa_ecc_ded_err_count; | ||
117 | } fbpa; | ||
118 | |||
119 | struct nvgpu_list_node stats_list; | ||
120 | int stats_count; | ||
121 | }; | ||
122 | |||
123 | int nvgpu_ecc_counter_init_per_tpc(struct gk20a *g, | ||
124 | struct nvgpu_ecc_stat ***stat, const char *name); | ||
125 | #define NVGPU_ECC_COUNTER_INIT_PER_TPC(stat) \ | ||
126 | nvgpu_ecc_counter_init_per_tpc(g, &g->ecc.gr.stat, #stat) | ||
127 | |||
128 | int nvgpu_ecc_counter_init_per_gpc(struct gk20a *g, | ||
129 | struct nvgpu_ecc_stat **stat, const char *name); | ||
130 | #define NVGPU_ECC_COUNTER_INIT_PER_GPC(stat) \ | ||
131 | nvgpu_ecc_counter_init_per_gpc(g, &g->ecc.gr.stat, #stat) | ||
132 | |||
133 | int nvgpu_ecc_counter_init(struct gk20a *g, | ||
134 | struct nvgpu_ecc_stat **stat, const char *name); | ||
135 | #define NVGPU_ECC_COUNTER_INIT_GR(stat) \ | ||
136 | nvgpu_ecc_counter_init(g, &g->ecc.gr.stat, #stat) | ||
137 | #define NVGPU_ECC_COUNTER_INIT_FB(stat) \ | ||
138 | nvgpu_ecc_counter_init(g, &g->ecc.fb.stat, #stat) | ||
139 | #define NVGPU_ECC_COUNTER_INIT_PMU(stat) \ | ||
140 | nvgpu_ecc_counter_init(g, &g->ecc.pmu.stat, #stat) | ||
141 | |||
142 | int nvgpu_ecc_counter_init_per_lts(struct gk20a *g, | ||
143 | struct nvgpu_ecc_stat ***stat, const char *name); | ||
144 | #define NVGPU_ECC_COUNTER_INIT_PER_LTS(stat) \ | ||
145 | nvgpu_ecc_counter_init_per_lts(g, &g->ecc.ltc.stat, #stat) | ||
146 | |||
147 | int nvgpu_ecc_counter_init_per_fbpa(struct gk20a *g, | ||
148 | struct nvgpu_ecc_stat **stat, const char *name); | ||
149 | #define NVGPU_ECC_COUNTER_INIT_PER_FBPA(stat) \ | ||
150 | nvgpu_ecc_counter_init_per_fbpa(g, &g->ecc.fbpa.stat, #stat) | ||
151 | |||
152 | void nvgpu_ecc_free(struct gk20a *g); | ||
153 | |||
154 | int nvgpu_ecc_init_support(struct gk20a *g); | ||
155 | void nvgpu_ecc_remove_support(struct gk20a *g); | ||
156 | |||
157 | /* OSes to implement */ | ||
158 | |||
159 | int nvgpu_ecc_sysfs_init(struct gk20a *g); | ||
160 | void nvgpu_ecc_sysfs_remove(struct gk20a *g); | ||
161 | |||
162 | #endif | ||