diff options
Diffstat (limited to 'drivers/gpu/nvgpu/os')
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/ecc_linux.h | 49 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/module.c | 22 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/os_linux.h | 7 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/sdl.c | 341 |
4 files changed, 416 insertions, 3 deletions
diff --git a/drivers/gpu/nvgpu/os/linux/ecc_linux.h b/drivers/gpu/nvgpu/os/linux/ecc_linux.h new file mode 100644 index 00000000..7e0f650b --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/ecc_linux.h | |||
@@ -0,0 +1,49 @@ | |||
1 | /* | ||
2 | * | ||
3 | * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. | ||
4 | * | ||
5 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
6 | * copy of this software and associated documentation files (the "Software"), | ||
7 | * to deal in the Software without restriction, including without limitation | ||
8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
9 | * and/or sell copies of the Software, and to permit persons to whom the | ||
10 | * Software is furnished to do so, subject to the following conditions: | ||
11 | * | ||
12 | * The above copyright notice and this permission notice shall be included in | ||
13 | * all copies or substantial portions of the Software. | ||
14 | * | ||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
21 | * DEALINGS IN THE SOFTWARE. | ||
22 | */ | ||
23 | |||
24 | #ifndef NVGPU_OS_ECC_LINUX_H | ||
25 | #define NVGPU_OS_ECC_LINUX_H | ||
26 | |||
27 | #ifdef CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING | ||
28 | |||
29 | #include <linux/tegra_l1ss_kernel_interface.h> | ||
30 | #include <linux/tegra_l1ss_ioctl.h> | ||
31 | #include <linux/tegra_nv_guard_service_id.h> | ||
32 | #include <linux/tegra_nv_guard_group_id.h> | ||
33 | |||
34 | #include <nvgpu/nvgpu_err.h> | ||
35 | |||
36 | struct nvgpu_ecc_reporting_linux { | ||
37 | struct nvgpu_ecc_reporting common; | ||
38 | client_param_t priv; | ||
39 | }; | ||
40 | |||
41 | static inline struct nvgpu_ecc_reporting_linux *get_ecc_reporting_linux( | ||
42 | struct nvgpu_ecc_reporting *ecc_report) | ||
43 | { | ||
44 | return container_of(ecc_report, struct nvgpu_ecc_reporting_linux, common); | ||
45 | } | ||
46 | |||
47 | #endif /* CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING */ | ||
48 | |||
49 | #endif \ No newline at end of file | ||
diff --git a/drivers/gpu/nvgpu/os/linux/module.c b/drivers/gpu/nvgpu/os/linux/module.c index 807df2ca..fdbab46d 100644 --- a/drivers/gpu/nvgpu/os/linux/module.c +++ b/drivers/gpu/nvgpu/os/linux/module.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * GK20A Graphics | 2 | * GK20A Graphics |
3 | * | 3 | * |
4 | * Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved. | 4 | * Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved. |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify it | 6 | * This program is free software; you can redistribute it and/or modify it |
7 | * under the terms and conditions of the GNU General Public License, | 7 | * under the terms and conditions of the GNU General Public License, |
@@ -49,6 +49,7 @@ | |||
49 | #include <nvgpu/clk_arb.h> | 49 | #include <nvgpu/clk_arb.h> |
50 | #include <nvgpu/timers.h> | 50 | #include <nvgpu/timers.h> |
51 | #include <nvgpu/channel.h> | 51 | #include <nvgpu/channel.h> |
52 | #include <nvgpu/nvgpu_err.h> | ||
52 | 53 | ||
53 | #include "platform_gk20a.h" | 54 | #include "platform_gk20a.h" |
54 | #include "sysfs.h" | 55 | #include "sysfs.h" |
@@ -355,6 +356,10 @@ int gk20a_pm_finalize_poweron(struct device *dev) | |||
355 | gk20a_init_cde_support(l); | 356 | gk20a_init_cde_support(l); |
356 | #endif | 357 | #endif |
357 | 358 | ||
359 | #ifdef CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING | ||
360 | nvgpu_enable_ecc_reporting(g); | ||
361 | #endif | ||
362 | |||
358 | err = gk20a_sched_ctrl_init(g); | 363 | err = gk20a_sched_ctrl_init(g); |
359 | if (err) { | 364 | if (err) { |
360 | nvgpu_err(g, "failed to init sched control"); | 365 | nvgpu_err(g, "failed to init sched control"); |
@@ -364,9 +369,14 @@ int gk20a_pm_finalize_poweron(struct device *dev) | |||
364 | g->sw_ready = true; | 369 | g->sw_ready = true; |
365 | 370 | ||
366 | done: | 371 | done: |
367 | if (err) | 372 | if (err) { |
368 | g->power_on = false; | 373 | g->power_on = false; |
369 | 374 | ||
375 | #ifdef CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING | ||
376 | nvgpu_disable_ecc_reporting(g); | ||
377 | #endif | ||
378 | } | ||
379 | |||
370 | nvgpu_mutex_release(&g->power_lock); | 380 | nvgpu_mutex_release(&g->power_lock); |
371 | return err; | 381 | return err; |
372 | } | 382 | } |
@@ -433,6 +443,10 @@ static int gk20a_pm_prepare_poweroff(struct device *dev) | |||
433 | /* Stop CPU from accessing the GPU registers. */ | 443 | /* Stop CPU from accessing the GPU registers. */ |
434 | gk20a_lockout_registers(g); | 444 | gk20a_lockout_registers(g); |
435 | 445 | ||
446 | #ifdef CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING | ||
447 | nvgpu_disable_ecc_reporting(g); | ||
448 | #endif | ||
449 | |||
436 | nvgpu_hide_usermode_for_poweroff(g); | 450 | nvgpu_hide_usermode_for_poweroff(g); |
437 | nvgpu_mutex_release(&g->power_lock); | 451 | nvgpu_mutex_release(&g->power_lock); |
438 | return 0; | 452 | return 0; |
@@ -1382,6 +1396,10 @@ static int gk20a_probe(struct platform_device *dev) | |||
1382 | goto return_err; | 1396 | goto return_err; |
1383 | } | 1397 | } |
1384 | 1398 | ||
1399 | #ifdef CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING | ||
1400 | nvgpu_init_ecc_reporting(gk20a); | ||
1401 | #endif | ||
1402 | |||
1385 | gk20a->nvgpu_reboot_nb.notifier_call = | 1403 | gk20a->nvgpu_reboot_nb.notifier_call = |
1386 | nvgpu_kernel_shutdown_notification; | 1404 | nvgpu_kernel_shutdown_notification; |
1387 | err = register_reboot_notifier(&gk20a->nvgpu_reboot_nb); | 1405 | err = register_reboot_notifier(&gk20a->nvgpu_reboot_nb); |
diff --git a/drivers/gpu/nvgpu/os/linux/os_linux.h b/drivers/gpu/nvgpu/os/linux/os_linux.h index 25c6c03a..adcfdb2f 100644 --- a/drivers/gpu/nvgpu/os/linux/os_linux.h +++ b/drivers/gpu/nvgpu/os/linux/os_linux.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. | 2 | * Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved. |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify it | 4 | * This program is free software; you can redistribute it and/or modify it |
5 | * under the terms and conditions of the GNU General Public License, | 5 | * under the terms and conditions of the GNU General Public License, |
@@ -25,6 +25,7 @@ | |||
25 | 25 | ||
26 | #include "cde.h" | 26 | #include "cde.h" |
27 | #include "sched.h" | 27 | #include "sched.h" |
28 | #include "ecc_linux.h" | ||
28 | 29 | ||
29 | struct nvgpu_os_linux_ops { | 30 | struct nvgpu_os_linux_ops { |
30 | struct { | 31 | struct { |
@@ -134,6 +135,10 @@ struct nvgpu_os_linux { | |||
134 | 135 | ||
135 | u64 regs_bus_addr; | 136 | u64 regs_bus_addr; |
136 | 137 | ||
138 | #ifdef CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING | ||
139 | struct nvgpu_ecc_reporting_linux ecc_reporting_linux; | ||
140 | #endif | ||
141 | |||
137 | struct nvgpu_os_linux_ops ops; | 142 | struct nvgpu_os_linux_ops ops; |
138 | 143 | ||
139 | #ifdef CONFIG_DEBUG_FS | 144 | #ifdef CONFIG_DEBUG_FS |
diff --git a/drivers/gpu/nvgpu/os/linux/sdl.c b/drivers/gpu/nvgpu/os/linux/sdl.c new file mode 100644 index 00000000..c4dccdc6 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/sdl.c | |||
@@ -0,0 +1,341 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2021, NVIDIA Corporation. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <nvgpu/gk20a.h> | ||
18 | #include <nvgpu/types.h> | ||
19 | #include <nvgpu/nvgpu_err.h> | ||
20 | #include <nvgpu/timers.h> | ||
21 | #include <nvgpu/bug.h> | ||
22 | |||
23 | #include "ecc_linux.h" | ||
24 | #include "os_linux.h" | ||
25 | #include "module.h" | ||
26 | |||
27 | /* This look-up table initializes the list of hw units and their errors. | ||
28 | * It also specifies the error injection mechanism supported, for each error. | ||
29 | * In case of hw error injection support, this initialization will be overriden | ||
30 | * by the values provided from the hal layes of corresponding hw units. | ||
31 | */ | ||
32 | static struct nvgpu_err_hw_module gv11b_err_lut[] = { | ||
33 | { | ||
34 | .name = "sm", | ||
35 | .hw_unit = (u32)NVGPU_ERR_MODULE_SM, | ||
36 | .num_errs = 21U, | ||
37 | .base_ecc_service_id = | ||
38 | NVGUARD_SERVICE_IGPU_SM_SWERR_L1_TAG_ECC_CORRECTED, | ||
39 | .errs = (struct nvgpu_err_desc[]) { | ||
40 | GPU_NONCRITERR("l1_tag_ecc_corrected", | ||
41 | GPU_SM_L1_TAG_ECC_CORRECTED, 0, 0), | ||
42 | GPU_CRITERR("l1_tag_ecc_uncorrected", | ||
43 | GPU_SM_L1_TAG_ECC_UNCORRECTED, 0, 0), | ||
44 | GPU_NONCRITERR("cbu_ecc_corrected", 0, 0, 0), | ||
45 | GPU_CRITERR("cbu_ecc_uncorrected", | ||
46 | GPU_SM_CBU_ECC_UNCORRECTED, 0, 0), | ||
47 | GPU_NONCRITERR("lrf_ecc_corrected", 0, 0, 0), | ||
48 | GPU_CRITERR("lrf_ecc_uncorrected", | ||
49 | GPU_SM_LRF_ECC_UNCORRECTED, 0, 0), | ||
50 | GPU_NONCRITERR("l1_data_ecc_corrected", 0, 0, 0), | ||
51 | GPU_CRITERR("l1_data_ecc_uncorrected", | ||
52 | GPU_SM_L1_DATA_ECC_UNCORRECTED, 0, 0), | ||
53 | GPU_NONCRITERR("icache_l0_data_ecc_corrected", 0, 0, 0), | ||
54 | GPU_CRITERR("icache_l0_data_ecc_uncorrected", | ||
55 | GPU_SM_ICACHE_L0_DATA_ECC_UNCORRECTED, 0, 0), | ||
56 | GPU_NONCRITERR("icache_l1_data_ecc_corrected", 0, 0, 0), | ||
57 | GPU_CRITERR("icache_l1_data_ecc_uncorrected", | ||
58 | GPU_SM_ICACHE_L1_DATA_ECC_UNCORRECTED, 0, 0), | ||
59 | GPU_NONCRITERR("icache_l0_predecode_ecc_corrected", 0, 0, 0), | ||
60 | GPU_CRITERR("icache_l0_predecode_ecc_uncorrected", | ||
61 | GPU_SM_ICACHE_L0_PREDECODE_ECC_UNCORRECTED, 0, 0), | ||
62 | GPU_NONCRITERR("l1_tag_miss_fifo_ecc_corrected", 0, 0, 0), | ||
63 | GPU_CRITERR("l1_tag_miss_fifo_ecc_uncorrected", | ||
64 | GPU_SM_L1_TAG_MISS_FIFO_ECC_UNCORRECTED, 0, 0), | ||
65 | GPU_NONCRITERR("l1_tag_s2r_pixprf_ecc_corrected", 0, 0, 0), | ||
66 | GPU_CRITERR("l1_tag_s2r_pixprf_ecc_uncorrected", | ||
67 | GPU_SM_L1_TAG_S2R_PIXPRF_ECC_UNCORRECTED, 0, 0), | ||
68 | GPU_CRITERR("machine_check_error", 0, 0, 0), | ||
69 | GPU_NONCRITERR("icache_l1_predecode_ecc_corrected", 0, 0, 0), | ||
70 | GPU_CRITERR("icache_l1_predecode_ecc_uncorrected", | ||
71 | GPU_SM_ICACHE_L1_PREDECODE_ECC_UNCORRECTED, 0, 0), | ||
72 | }, | ||
73 | }, | ||
74 | { | ||
75 | .name = "fecs", | ||
76 | .hw_unit = (u32)NVGPU_ERR_MODULE_FECS, | ||
77 | .num_errs = 4U, | ||
78 | .base_ecc_service_id = | ||
79 | NVGUARD_SERVICE_IGPU_FECS_SWERR_FALCON_IMEM_ECC_CORRECTED, | ||
80 | .errs = (struct nvgpu_err_desc[]) { | ||
81 | GPU_NONCRITERR("falcon_imem_ecc_corrected", | ||
82 | GPU_FECS_FALCON_IMEM_ECC_CORRECTED, 0, 0), | ||
83 | GPU_CRITERR("falcon_imem_ecc_uncorrected", | ||
84 | GPU_FECS_FALCON_IMEM_ECC_UNCORRECTED, 0, 0), | ||
85 | GPU_NONCRITERR("falcon_dmem_ecc_corrected", 0, 0, 0), | ||
86 | GPU_CRITERR("falcon_dmem_ecc_uncorrected", | ||
87 | GPU_FECS_FALCON_DMEM_ECC_UNCORRECTED, 0, 0), | ||
88 | }, | ||
89 | }, | ||
90 | { | ||
91 | .name = "pmu", | ||
92 | .hw_unit = NVGPU_ERR_MODULE_PMU, | ||
93 | .num_errs = 4U, | ||
94 | .base_ecc_service_id = | ||
95 | NVGUARD_SERVICE_IGPU_PMU_SWERR_FALCON_IMEM_ECC_CORRECTED, | ||
96 | .errs = (struct nvgpu_err_desc[]) { | ||
97 | GPU_NONCRITERR("falcon_imem_ecc_corrected", | ||
98 | GPU_PMU_FALCON_IMEM_ECC_CORRECTED, 0, 0), | ||
99 | GPU_CRITERR("falcon_imem_ecc_uncorrected", | ||
100 | GPU_PMU_FALCON_IMEM_ECC_UNCORRECTED, 0, 0), | ||
101 | GPU_NONCRITERR("falcon_dmem_ecc_corrected", 0, 0, 0), | ||
102 | GPU_CRITERR("falcon_dmem_ecc_uncorrected", | ||
103 | GPU_PMU_FALCON_DMEM_ECC_UNCORRECTED, 0, 0), | ||
104 | }, | ||
105 | }, | ||
106 | }; | ||
107 | |||
108 | static void nvgpu_init_err_msg_header(struct gpu_err_header *header) | ||
109 | { | ||
110 | header->version.major = (u16)1U; | ||
111 | header->version.minor = (u16)0U; | ||
112 | header->sub_err_type = 0U; | ||
113 | header->sub_unit_id = 0UL; | ||
114 | header->address = 0UL; | ||
115 | header->timestamp_ns = 0UL; | ||
116 | } | ||
117 | |||
118 | static void nvgpu_init_ecc_err_msg(struct gpu_ecc_error_info *err_info) | ||
119 | { | ||
120 | nvgpu_init_err_msg_header(&err_info->header); | ||
121 | err_info->err_cnt = 0UL; | ||
122 | } | ||
123 | |||
124 | static void nvgpu_report_ecc_error_linux(struct gk20a *g, u32 hw_unit, u32 inst, | ||
125 | u32 err_id, u64 err_addr, u64 err_count) | ||
126 | { | ||
127 | int err = 0; | ||
128 | u32 s_id = 0; | ||
129 | u8 err_status = 0; | ||
130 | u8 err_info_size = 0; | ||
131 | u64 timestamp = 0ULL; | ||
132 | int err_threshold_counter = 0; | ||
133 | struct gpu_ecc_error_info err_pkt; | ||
134 | struct nvgpu_err_desc *err_desc = NULL; | ||
135 | struct nvgpu_err_hw_module *hw_module = NULL; | ||
136 | nv_guard_request_t req; | ||
137 | |||
138 | memset(&req, 0, sizeof(req)); | ||
139 | nvgpu_init_ecc_err_msg(&err_pkt); | ||
140 | if (hw_unit >= sizeof(gv11b_err_lut)/sizeof(gv11b_err_lut[0])) { | ||
141 | err = -EINVAL; | ||
142 | goto done; | ||
143 | } | ||
144 | |||
145 | hw_module = &gv11b_err_lut[hw_unit]; | ||
146 | if (err_id >= hw_module->num_errs) { | ||
147 | nvgpu_err(g, "invalid err_id (%u) for hw module (%u)", | ||
148 | err_id, hw_module->hw_unit); | ||
149 | err = -EINVAL; | ||
150 | goto done; | ||
151 | } | ||
152 | err_desc = &hw_module->errs[err_id]; | ||
153 | timestamp = (u64)nvgpu_current_time_ns(); | ||
154 | |||
155 | err_pkt.header.timestamp_ns = timestamp; | ||
156 | err_pkt.header.sub_unit_id = inst; | ||
157 | err_pkt.header.address = err_addr; | ||
158 | err_pkt.err_cnt = err_count; | ||
159 | err_info_size = sizeof(err_pkt); | ||
160 | |||
161 | s_id = hw_module->base_ecc_service_id + err_id; | ||
162 | |||
163 | if (err_desc->is_critical) { | ||
164 | err_status = NVGUARD_ERROR_DETECTED; | ||
165 | } else { | ||
166 | err_status = NVGUARD_NO_ERROR; | ||
167 | } | ||
168 | |||
169 | nvgpu_atomic_inc(&err_desc->err_count); | ||
170 | err_threshold_counter = nvgpu_atomic_cmpxchg(&err_desc->err_count, | ||
171 | err_desc->err_threshold + 1, 0); | ||
172 | |||
173 | if (unlikely(err_threshold_counter != err_desc->err_threshold + 1)) { | ||
174 | goto done; | ||
175 | } | ||
176 | |||
177 | nvgpu_log(g, gpu_dbg_ecc, "ECC reporting hw: %s, desc:%s, count:%llu", | ||
178 | hw_module->name, err_desc->name, err_count); | ||
179 | |||
180 | req.srv_id_cmd = NVGUARD_SERVICESTATUS_NOTIFICATION; | ||
181 | req.srv_status.srv_id = (nv_guard_service_id_t)s_id; | ||
182 | req.srv_status.status = err_status; | ||
183 | req.srv_status.timestamp = timestamp; | ||
184 | req.srv_status.error_info_size = err_info_size; | ||
185 | memcpy(req.srv_status.error_info, (u8*)&err_pkt, err_info_size); | ||
186 | |||
187 | /* | ||
188 | * l1ss_submit_rq may fail due to kmalloc failures but may pass in | ||
189 | * subsequent calls | ||
190 | */ | ||
191 | err = l1ss_submit_rq(&req, true); | ||
192 | if (err != 0) { | ||
193 | nvgpu_err(g, "Error returned from L1SS submit %d", err); | ||
194 | } | ||
195 | |||
196 | if (err_desc->is_critical) { | ||
197 | nvgpu_quiesce(g); | ||
198 | } | ||
199 | |||
200 | done: | ||
201 | return; | ||
202 | } | ||
203 | |||
204 | static void nvgpu_report_ecc_error_empty(struct gk20a *g, u32 hw_unit, u32 inst, | ||
205 | u32 err_id, u64 err_addr, u64 err_count) { | ||
206 | nvgpu_log(g, gpu_dbg_ecc, "ECC reporting empty"); | ||
207 | } | ||
208 | |||
209 | const struct nvgpu_ecc_reporting_ops default_disabled_ecc_report_ops = { | ||
210 | .report_ecc_err = nvgpu_report_ecc_error_empty, | ||
211 | }; | ||
212 | |||
213 | const struct nvgpu_ecc_reporting_ops ecc_enable_report_ops = { | ||
214 | .report_ecc_err = nvgpu_report_ecc_error_linux, | ||
215 | }; | ||
216 | |||
217 | static int nvgpu_l1ss_callback(l1ss_cli_callback_param param, void *data) | ||
218 | { | ||
219 | struct gk20a *g = (struct gk20a *)data; | ||
220 | struct nvgpu_os_linux *l = NULL; | ||
221 | struct nvgpu_ecc_reporting_linux *ecc_reporting_linux = NULL; | ||
222 | int err = 0; | ||
223 | /* Ensure we have a valid gk20a struct before proceeding */ | ||
224 | if ((g == NULL) || (gk20a_get(g) == NULL)) { | ||
225 | return -ENODEV; | ||
226 | } | ||
227 | |||
228 | l = nvgpu_os_linux_from_gk20a(g); | ||
229 | ecc_reporting_linux = &l->ecc_reporting_linux; | ||
230 | |||
231 | nvgpu_spinlock_acquire(&ecc_reporting_linux->common.lock); | ||
232 | if (param == L1SS_READY) { | ||
233 | if (!ecc_reporting_linux->common.ecc_reporting_service_enabled) { | ||
234 | ecc_reporting_linux->common.ecc_reporting_service_enabled = true; | ||
235 | ecc_reporting_linux->common.ops = &ecc_enable_report_ops; | ||
236 | nvgpu_log(g, gpu_dbg_ecc, "ECC reporting is enabled"); | ||
237 | } | ||
238 | } else if (param == L1SS_NOT_READY) { | ||
239 | if (ecc_reporting_linux->common.ecc_reporting_service_enabled) { | ||
240 | ecc_reporting_linux->common.ecc_reporting_service_enabled = false; | ||
241 | ecc_reporting_linux->common.ops = &default_disabled_ecc_report_ops; | ||
242 | nvgpu_log(g, gpu_dbg_ecc, "ECC reporting is disabled"); | ||
243 | } | ||
244 | } else { | ||
245 | err = -EINVAL; | ||
246 | } | ||
247 | nvgpu_spinlock_release(&ecc_reporting_linux->common.lock); | ||
248 | |||
249 | gk20a_put(g); | ||
250 | |||
251 | return err; | ||
252 | } | ||
253 | |||
254 | void nvgpu_init_ecc_reporting(struct gk20a *g) | ||
255 | { | ||
256 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
257 | struct nvgpu_ecc_reporting_linux *ecc_report_linux = &l->ecc_reporting_linux; | ||
258 | int err = 0; | ||
259 | /* This will invoke the registration API */ | ||
260 | nvgpu_spinlock_init(&ecc_report_linux->common.lock); | ||
261 | ecc_report_linux->priv.id = (NVGUARD_GROUPID_IGPU & NVGUARD_GROUPINDEX_FIELDMASK); | ||
262 | ecc_report_linux->priv.cli_callback = nvgpu_l1ss_callback; | ||
263 | ecc_report_linux->priv.data = g; | ||
264 | ecc_report_linux->common.ops = &default_disabled_ecc_report_ops; | ||
265 | |||
266 | nvgpu_log(g, gpu_dbg_ecc, "ECC reporting Init"); | ||
267 | |||
268 | /* | ||
269 | * err == 0 indicates service is available but not active yet. | ||
270 | * err == 1 indicates service is available and active | ||
271 | * error for other cases. | ||
272 | */ | ||
273 | err = l1ss_register_client(&ecc_report_linux->priv); | ||
274 | if (err == 0) { | ||
275 | ecc_report_linux->common.ecc_reporting_service_enabled = false; | ||
276 | nvgpu_log(g, gpu_dbg_ecc, "ECC reporting init success"); | ||
277 | } else if (err == 1) { | ||
278 | ecc_report_linux->common.ecc_reporting_service_enabled = true; | ||
279 | /* Actual Ops will be replaced during nvgpu_enable_ecc_reporting | ||
280 | * called as part of gk20a_busy() | ||
281 | */ | ||
282 | } else { | ||
283 | nvgpu_log(g, gpu_dbg_ecc, "ECC reporting init failure %d", err); | ||
284 | } | ||
285 | } | ||
286 | |||
287 | void nvgpu_deinit_ecc_reporting(struct gk20a *g) | ||
288 | { | ||
289 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
290 | struct nvgpu_ecc_reporting_linux *ecc_report_linux = &l->ecc_reporting_linux; | ||
291 | |||
292 | if (ecc_report_linux->common.ecc_reporting_service_enabled) { | ||
293 | ecc_report_linux->common.ecc_reporting_service_enabled = false; | ||
294 | l1ss_deregister_client(ecc_report_linux->priv.id); | ||
295 | memset(ecc_report_linux, 0, sizeof(*ecc_report_linux)); | ||
296 | nvgpu_log(g, gpu_dbg_ecc, "ECC reporting de-init success"); | ||
297 | } | ||
298 | |||
299 | } | ||
300 | |||
301 | void nvgpu_enable_ecc_reporting(struct gk20a *g) | ||
302 | { | ||
303 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
304 | struct nvgpu_ecc_reporting_linux *ecc_report_linux = &l->ecc_reporting_linux; | ||
305 | struct nvgpu_ecc_reporting *error_reporting = &ecc_report_linux->common; | ||
306 | |||
307 | nvgpu_spinlock_acquire(&ecc_report_linux->common.lock); | ||
308 | if (error_reporting->ecc_reporting_service_enabled) { | ||
309 | error_reporting->ops = &ecc_enable_report_ops; | ||
310 | nvgpu_log(g, gpu_dbg_ecc, "ECC reporting is enabled"); | ||
311 | } | ||
312 | nvgpu_spinlock_release(&ecc_report_linux->common.lock); | ||
313 | } | ||
314 | |||
315 | void nvgpu_disable_ecc_reporting(struct gk20a *g) | ||
316 | { | ||
317 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
318 | struct nvgpu_ecc_reporting_linux *ecc_report_linux = &l->ecc_reporting_linux; | ||
319 | struct nvgpu_ecc_reporting *error_reporting = &ecc_report_linux->common; | ||
320 | |||
321 | nvgpu_spinlock_acquire(&ecc_report_linux->common.lock); | ||
322 | error_reporting->ops = &default_disabled_ecc_report_ops; | ||
323 | nvgpu_log(g, gpu_dbg_ecc, "ECC reporting is disabled"); | ||
324 | nvgpu_spinlock_release(&ecc_report_linux->common.lock); | ||
325 | } | ||
326 | |||
327 | void nvgpu_report_ecc_err(struct gk20a *g, u32 hw_unit, u32 inst, | ||
328 | u32 err_id, u64 err_addr, u64 err_count) | ||
329 | { | ||
330 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
331 | struct nvgpu_ecc_reporting_linux *ecc_report_linux = &l->ecc_reporting_linux; | ||
332 | struct nvgpu_ecc_reporting *error_reporting = &ecc_report_linux->common; | ||
333 | void (*report_ecc_err_func)(struct gk20a *g, u32 hw_unit, u32 inst, | ||
334 | u32 err_id, u64 err_addr, u64 err_count); | ||
335 | |||
336 | nvgpu_spinlock_acquire(&ecc_report_linux->common.lock); | ||
337 | report_ecc_err_func = error_reporting->ops->report_ecc_err; | ||
338 | nvgpu_spinlock_release(&ecc_report_linux->common.lock); | ||
339 | |||
340 | report_ecc_err_func(g, hw_unit, inst, err_id, err_addr, err_count); | ||
341 | } | ||