1 files changed, 0 insertions, 341 deletions
diff --git a/include/os/linux/sdl.c b/include/os/linux/sdl.c
deleted file mode 100644
index c4dccdc..0000000
--- a/include/os/linux/sdl.c
+++ /dev/null
@@ -1,341 +0,0 @@
-/*
- * Copyright (c) 2021, NVIDIA Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-#include <nvgpu/gk20a.h>
-#include <nvgpu/types.h>
-#include <nvgpu/nvgpu_err.h>
-#include <nvgpu/timers.h>
-#include <nvgpu/bug.h>
-#include "ecc_linux.h"
-#include "os_linux.h"
-#include "module.h"
-/* This look-up table initializes the list of hw units and their errors.
- * It also specifies the error injection mechanism supported, for each error.
- * In case of hw error injection support, this initialization will be overriden
- * by the values provided from the hal layes of corresponding hw units.
- */
-static struct nvgpu_err_hw_module gv11b_err_lut[] = {
-        {
-                .name = "sm",
-                .hw_unit = (u32)NVGPU_ERR_MODULE_SM,
-                .num_errs = 21U,
-                .base_ecc_service_id =
-                        NVGUARD_SERVICE_IGPU_SM_SWERR_L1_TAG_ECC_CORRECTED,
-                .errs = (struct nvgpu_err_desc[]) {
-                        GPU_NONCRITERR("l1_tag_ecc_corrected",
-                                        GPU_SM_L1_TAG_ECC_CORRECTED, 0, 0),
-                        GPU_CRITERR("l1_tag_ecc_uncorrected",
-                                        GPU_SM_L1_TAG_ECC_UNCORRECTED, 0, 0),
-                        GPU_NONCRITERR("cbu_ecc_corrected", 0, 0, 0),
-                        GPU_CRITERR("cbu_ecc_uncorrected",
-                                        GPU_SM_CBU_ECC_UNCORRECTED, 0, 0),
-                        GPU_NONCRITERR("lrf_ecc_corrected", 0, 0, 0),
-                        GPU_CRITERR("lrf_ecc_uncorrected",
-                                        GPU_SM_LRF_ECC_UNCORRECTED, 0, 0),
-                        GPU_NONCRITERR("l1_data_ecc_corrected", 0, 0, 0),
-                        GPU_CRITERR("l1_data_ecc_uncorrected",
-                                        GPU_SM_L1_DATA_ECC_UNCORRECTED, 0, 0),
-                        GPU_NONCRITERR("icache_l0_data_ecc_corrected", 0, 0, 0),
-                        GPU_CRITERR("icache_l0_data_ecc_uncorrected",
-                                        GPU_SM_ICACHE_L0_DATA_ECC_UNCORRECTED, 0, 0),
-                        GPU_NONCRITERR("icache_l1_data_ecc_corrected", 0, 0, 0),
-                        GPU_CRITERR("icache_l1_data_ecc_uncorrected",
-                                        GPU_SM_ICACHE_L1_DATA_ECC_UNCORRECTED, 0, 0),
-                        GPU_NONCRITERR("icache_l0_predecode_ecc_corrected", 0, 0, 0),
-                        GPU_CRITERR("icache_l0_predecode_ecc_uncorrected",
-                                        GPU_SM_ICACHE_L0_PREDECODE_ECC_UNCORRECTED, 0, 0),
-                        GPU_NONCRITERR("l1_tag_miss_fifo_ecc_corrected", 0, 0, 0),
-                        GPU_CRITERR("l1_tag_miss_fifo_ecc_uncorrected",
-                                        GPU_SM_L1_TAG_MISS_FIFO_ECC_UNCORRECTED, 0, 0),
-                        GPU_NONCRITERR("l1_tag_s2r_pixprf_ecc_corrected", 0, 0, 0),
-                        GPU_CRITERR("l1_tag_s2r_pixprf_ecc_uncorrected",
-                                        GPU_SM_L1_TAG_S2R_PIXPRF_ECC_UNCORRECTED, 0, 0),
-                        GPU_CRITERR("machine_check_error", 0, 0, 0),
-                        GPU_NONCRITERR("icache_l1_predecode_ecc_corrected", 0, 0, 0),
-                        GPU_CRITERR("icache_l1_predecode_ecc_uncorrected",
-                                        GPU_SM_ICACHE_L1_PREDECODE_ECC_UNCORRECTED, 0, 0),
-                },
-        },
-        {
-                .name = "fecs",
-                .hw_unit = (u32)NVGPU_ERR_MODULE_FECS,
-                .num_errs = 4U,
-                .base_ecc_service_id =
-                        NVGUARD_SERVICE_IGPU_FECS_SWERR_FALCON_IMEM_ECC_CORRECTED,
-                .errs = (struct nvgpu_err_desc[]) {
-                        GPU_NONCRITERR("falcon_imem_ecc_corrected",
-                                        GPU_FECS_FALCON_IMEM_ECC_CORRECTED, 0, 0),
-                        GPU_CRITERR("falcon_imem_ecc_uncorrected",
-                                        GPU_FECS_FALCON_IMEM_ECC_UNCORRECTED, 0, 0),
-                        GPU_NONCRITERR("falcon_dmem_ecc_corrected", 0, 0, 0),
-                        GPU_CRITERR("falcon_dmem_ecc_uncorrected",
-                                        GPU_FECS_FALCON_DMEM_ECC_UNCORRECTED, 0, 0),
-                },
-        },
-        {
-                .name = "pmu",
-                .hw_unit = NVGPU_ERR_MODULE_PMU,
-                .num_errs = 4U,
-                .base_ecc_service_id =
-                        NVGUARD_SERVICE_IGPU_PMU_SWERR_FALCON_IMEM_ECC_CORRECTED,
-                .errs = (struct nvgpu_err_desc[]) {
-                        GPU_NONCRITERR("falcon_imem_ecc_corrected",
-                                        GPU_PMU_FALCON_IMEM_ECC_CORRECTED, 0, 0),
-                        GPU_CRITERR("falcon_imem_ecc_uncorrected",
-                                        GPU_PMU_FALCON_IMEM_ECC_UNCORRECTED, 0, 0),
-                        GPU_NONCRITERR("falcon_dmem_ecc_corrected", 0, 0, 0),
-                        GPU_CRITERR("falcon_dmem_ecc_uncorrected",
-                                        GPU_PMU_FALCON_DMEM_ECC_UNCORRECTED, 0, 0),
-                },
-        },
-};
-static void nvgpu_init_err_msg_header(struct gpu_err_header *header)
-{
-        header->version.major = (u16)1U;
-        header->version.minor = (u16)0U;
-        header->sub_err_type = 0U;
-        header->sub_unit_id = 0UL;
-        header->address = 0UL;
-        header->timestamp_ns = 0UL;
-}
-static void nvgpu_init_ecc_err_msg(struct gpu_ecc_error_info *err_info)
-{
-        nvgpu_init_err_msg_header(&err_info->header);
-        err_info->err_cnt = 0UL;
-}
-static void nvgpu_report_ecc_error_linux(struct gk20a *g, u32 hw_unit, u32 inst,
-                u32 err_id, u64 err_addr, u64 err_count)
-{
-        int err = 0;
-        u32 s_id = 0;
-        u8 err_status = 0;
-        u8 err_info_size = 0;
-        u64 timestamp = 0ULL;
-        int err_threshold_counter = 0;
-        struct gpu_ecc_error_info err_pkt;
-        struct nvgpu_err_desc *err_desc = NULL;
-        struct nvgpu_err_hw_module *hw_module = NULL;
-        nv_guard_request_t req;
-        memset(&req, 0, sizeof(req));
-        nvgpu_init_ecc_err_msg(&err_pkt);
-        if (hw_unit >= sizeof(gv11b_err_lut)/sizeof(gv11b_err_lut[0])) {
-                err = -EINVAL;
-                goto done;
-        }
-        hw_module = &gv11b_err_lut[hw_unit];
-        if (err_id >= hw_module->num_errs) {
-                nvgpu_err(g, "invalid err_id (%u) for hw module (%u)",
-                        err_id, hw_module->hw_unit);
-                err = -EINVAL;
-                goto done;
-        }
-        err_desc = &hw_module->errs[err_id];
-        timestamp = (u64)nvgpu_current_time_ns();
-        err_pkt.header.timestamp_ns = timestamp;
-        err_pkt.header.sub_unit_id = inst;
-        err_pkt.header.address = err_addr;
-        err_pkt.err_cnt = err_count;
-        err_info_size = sizeof(err_pkt);
-        s_id = hw_module->base_ecc_service_id + err_id;
-        if (err_desc->is_critical) {
-                err_status = NVGUARD_ERROR_DETECTED;
-        } else {
-                err_status = NVGUARD_NO_ERROR;
-        }
-        nvgpu_atomic_inc(&err_desc->err_count);
-        err_threshold_counter = nvgpu_atomic_cmpxchg(&err_desc->err_count,
-                        err_desc->err_threshold + 1, 0);
-        if (unlikely(err_threshold_counter != err_desc->err_threshold + 1)) {
-                goto done;
-        }
-        nvgpu_log(g, gpu_dbg_ecc, "ECC reporting hw: %s, desc:%s, count:%llu",
-                hw_module->name, err_desc->name, err_count);
-        req.srv_id_cmd = NVGUARD_SERVICESTATUS_NOTIFICATION;
-        req.srv_status.srv_id = (nv_guard_service_id_t)s_id;
-        req.srv_status.status = err_status;
-        req.srv_status.timestamp = timestamp;
-        req.srv_status.error_info_size = err_info_size;
-        memcpy(req.srv_status.error_info, (u8*)&err_pkt, err_info_size);
-        /*
-         * l1ss_submit_rq may fail due to kmalloc failures but may pass in
-         * subsequent calls
-         */
-        err = l1ss_submit_rq(&req, true);
-        if (err != 0) {
-                nvgpu_err(g, "Error returned from L1SS submit %d", err);
-        }
-        if (err_desc->is_critical) {
-                nvgpu_quiesce(g);
-        }
-done:
-        return;
-}
-static void nvgpu_report_ecc_error_empty(struct gk20a *g, u32 hw_unit, u32 inst,
-                u32 err_id, u64 err_addr, u64 err_count) {
-                nvgpu_log(g, gpu_dbg_ecc, "ECC reporting empty");
-}
-const struct nvgpu_ecc_reporting_ops default_disabled_ecc_report_ops = {
-        .report_ecc_err = nvgpu_report_ecc_error_empty,
-};
-const struct nvgpu_ecc_reporting_ops ecc_enable_report_ops = {
-        .report_ecc_err = nvgpu_report_ecc_error_linux,
-};
-static int nvgpu_l1ss_callback(l1ss_cli_callback_param param, void *data)
-{
-        struct gk20a *g = (struct gk20a *)data;
-        struct nvgpu_os_linux *l = NULL;
-        struct nvgpu_ecc_reporting_linux *ecc_reporting_linux = NULL;
-        int err = 0;
-        /* Ensure we have a valid gk20a struct before proceeding */
-        if ((g == NULL) || (gk20a_get(g) == NULL)) {
-                return -ENODEV;
-        }
-        l = nvgpu_os_linux_from_gk20a(g);
-        ecc_reporting_linux = &l->ecc_reporting_linux;
-        nvgpu_spinlock_acquire(&ecc_reporting_linux->common.lock);
-        if (param == L1SS_READY) {
-                if (!ecc_reporting_linux->common.ecc_reporting_service_enabled) {
-                        ecc_reporting_linux->common.ecc_reporting_service_enabled = true;
-                        ecc_reporting_linux->common.ops = &ecc_enable_report_ops;
-                        nvgpu_log(g, gpu_dbg_ecc, "ECC reporting is enabled");
-                }
-        } else if (param == L1SS_NOT_READY) {
-                if (ecc_reporting_linux->common.ecc_reporting_service_enabled) {
-                        ecc_reporting_linux->common.ecc_reporting_service_enabled = false;
-                        ecc_reporting_linux->common.ops = &default_disabled_ecc_report_ops;
-                        nvgpu_log(g, gpu_dbg_ecc, "ECC reporting is disabled");
-                }
-        } else {
-                err = -EINVAL;
-        }
-        nvgpu_spinlock_release(&ecc_reporting_linux->common.lock);
-        gk20a_put(g);
-        return err;
-}
-void nvgpu_init_ecc_reporting(struct gk20a *g)
-{
-        struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-        struct nvgpu_ecc_reporting_linux *ecc_report_linux = &l->ecc_reporting_linux;
-        int err = 0;
-        /* This will invoke the registration API */
-        nvgpu_spinlock_init(&ecc_report_linux->common.lock);
-        ecc_report_linux->priv.id = (NVGUARD_GROUPID_IGPU & NVGUARD_GROUPINDEX_FIELDMASK);
-        ecc_report_linux->priv.cli_callback = nvgpu_l1ss_callback;
-        ecc_report_linux->priv.data = g;
-        ecc_report_linux->common.ops = &default_disabled_ecc_report_ops;
-        nvgpu_log(g, gpu_dbg_ecc, "ECC reporting Init");
-        /*
-         * err == 0 indicates service is available but not active yet.
-         * err == 1 indicates service is available and active
-         * error for other cases.
-         */
-        err = l1ss_register_client(&ecc_report_linux->priv);
-        if (err == 0) {
-                ecc_report_linux->common.ecc_reporting_service_enabled = false;
-                nvgpu_log(g, gpu_dbg_ecc, "ECC reporting init success");
-        } else if (err == 1) {
-                ecc_report_linux->common.ecc_reporting_service_enabled = true;
-                /* Actual Ops will be replaced during nvgpu_enable_ecc_reporting
-                 * called as part of gk20a_busy()
-                 */
-        } else {
-                nvgpu_log(g, gpu_dbg_ecc, "ECC reporting init failure %d", err);
-        }
-}
-void nvgpu_deinit_ecc_reporting(struct gk20a *g)
-{
-        struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-        struct nvgpu_ecc_reporting_linux *ecc_report_linux = &l->ecc_reporting_linux;
-        if (ecc_report_linux->common.ecc_reporting_service_enabled) {
-                ecc_report_linux->common.ecc_reporting_service_enabled = false;
-                l1ss_deregister_client(ecc_report_linux->priv.id);
-                memset(ecc_report_linux, 0, sizeof(*ecc_report_linux));
-                nvgpu_log(g, gpu_dbg_ecc, "ECC reporting de-init success");
-        }
-}
-void nvgpu_enable_ecc_reporting(struct gk20a *g)
-{
-        struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-        struct nvgpu_ecc_reporting_linux *ecc_report_linux = &l->ecc_reporting_linux;
-        struct nvgpu_ecc_reporting *error_reporting = &ecc_report_linux->common;
-        nvgpu_spinlock_acquire(&ecc_report_linux->common.lock);
-        if (error_reporting->ecc_reporting_service_enabled) {
-                error_reporting->ops = &ecc_enable_report_ops;
-                nvgpu_log(g, gpu_dbg_ecc, "ECC reporting is enabled");
-        }
-        nvgpu_spinlock_release(&ecc_report_linux->common.lock);
-}
-void nvgpu_disable_ecc_reporting(struct gk20a *g)
-{
-        struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-        struct nvgpu_ecc_reporting_linux *ecc_report_linux = &l->ecc_reporting_linux;
-        struct nvgpu_ecc_reporting *error_reporting = &ecc_report_linux->common;
-        nvgpu_spinlock_acquire(&ecc_report_linux->common.lock);
-        error_reporting->ops = &default_disabled_ecc_report_ops;
-        nvgpu_log(g, gpu_dbg_ecc, "ECC reporting is disabled");
-        nvgpu_spinlock_release(&ecc_report_linux->common.lock);
-}
-void nvgpu_report_ecc_err(struct gk20a *g, u32 hw_unit, u32 inst,
-                u32 err_id, u64 err_addr, u64 err_count)
-{
-        struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
-        struct nvgpu_ecc_reporting_linux *ecc_report_linux = &l->ecc_reporting_linux;
-        struct nvgpu_ecc_reporting *error_reporting = &ecc_report_linux->common;
-        void (*report_ecc_err_func)(struct gk20a *g, u32 hw_unit, u32 inst,
-                u32 err_id, u64 err_addr, u64 err_count);
-        nvgpu_spinlock_acquire(&ecc_report_linux->common.lock);
-        report_ecc_err_func = error_reporting->ops->report_ecc_err;
-        nvgpu_spinlock_release(&ecc_report_linux->common.lock);
-        report_ecc_err_func(g, hw_unit, inst, err_id, err_addr, err_count);
-}

diff --git a/include/os/linux/sdl.c b/include/os/linux/sdl.c deleted file mode 100644 index c4dccdc..0000000 --- a/include/os/linux/sdl.c +++ /dev/null
@@ -1,341 +0,0 @@
1	/*
2	* Copyright (c) 2021, NVIDIA Corporation. All rights reserved.
3	*
4	* This program is free software; you can redistribute it and/or modify it
5	* under the terms and conditions of the GNU General Public License,
6	* version 2, as published by the Free Software Foundation.
7	*
8	* This program is distributed in the hope it will be useful, but WITHOUT
9	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11	* more details.
12	*
13	* You should have received a copy of the GNU General Public License
14	* along with this program. If not, see <http://www.gnu.org/licenses/>.
15	*/
16
17	#include <nvgpu/gk20a.h>
18	#include <nvgpu/types.h>
19	#include <nvgpu/nvgpu_err.h>
20	#include <nvgpu/timers.h>
21	#include <nvgpu/bug.h>
22
23	#include "ecc_linux.h"
24	#include "os_linux.h"
25	#include "module.h"
26
27	/* This look-up table initializes the list of hw units and their errors.
28	* It also specifies the error injection mechanism supported, for each error.
29	* In case of hw error injection support, this initialization will be overriden
30	* by the values provided from the hal layes of corresponding hw units.
31	*/
32	static struct nvgpu_err_hw_module gv11b_err_lut[] = {
33	{
34	.name = "sm",
35	.hw_unit = (u32)NVGPU_ERR_MODULE_SM,
36	.num_errs = 21U,
37	.base_ecc_service_id =
38	NVGUARD_SERVICE_IGPU_SM_SWERR_L1_TAG_ECC_CORRECTED,
39	.errs = (struct nvgpu_err_desc[]) {
40	GPU_NONCRITERR("l1_tag_ecc_corrected",
41	GPU_SM_L1_TAG_ECC_CORRECTED, 0, 0),
42	GPU_CRITERR("l1_tag_ecc_uncorrected",
43	GPU_SM_L1_TAG_ECC_UNCORRECTED, 0, 0),
44	GPU_NONCRITERR("cbu_ecc_corrected", 0, 0, 0),
45	GPU_CRITERR("cbu_ecc_uncorrected",
46	GPU_SM_CBU_ECC_UNCORRECTED, 0, 0),
47	GPU_NONCRITERR("lrf_ecc_corrected", 0, 0, 0),
48	GPU_CRITERR("lrf_ecc_uncorrected",
49	GPU_SM_LRF_ECC_UNCORRECTED, 0, 0),
50	GPU_NONCRITERR("l1_data_ecc_corrected", 0, 0, 0),
51	GPU_CRITERR("l1_data_ecc_uncorrected",
52	GPU_SM_L1_DATA_ECC_UNCORRECTED, 0, 0),
53	GPU_NONCRITERR("icache_l0_data_ecc_corrected", 0, 0, 0),
54	GPU_CRITERR("icache_l0_data_ecc_uncorrected",
55	GPU_SM_ICACHE_L0_DATA_ECC_UNCORRECTED, 0, 0),
56	GPU_NONCRITERR("icache_l1_data_ecc_corrected", 0, 0, 0),
57	GPU_CRITERR("icache_l1_data_ecc_uncorrected",
58	GPU_SM_ICACHE_L1_DATA_ECC_UNCORRECTED, 0, 0),
59	GPU_NONCRITERR("icache_l0_predecode_ecc_corrected", 0, 0, 0),
60	GPU_CRITERR("icache_l0_predecode_ecc_uncorrected",
61	GPU_SM_ICACHE_L0_PREDECODE_ECC_UNCORRECTED, 0, 0),
62	GPU_NONCRITERR("l1_tag_miss_fifo_ecc_corrected", 0, 0, 0),
63	GPU_CRITERR("l1_tag_miss_fifo_ecc_uncorrected",
64	GPU_SM_L1_TAG_MISS_FIFO_ECC_UNCORRECTED, 0, 0),
65	GPU_NONCRITERR("l1_tag_s2r_pixprf_ecc_corrected", 0, 0, 0),
66	GPU_CRITERR("l1_tag_s2r_pixprf_ecc_uncorrected",
67	GPU_SM_L1_TAG_S2R_PIXPRF_ECC_UNCORRECTED, 0, 0),
68	GPU_CRITERR("machine_check_error", 0, 0, 0),
69	GPU_NONCRITERR("icache_l1_predecode_ecc_corrected", 0, 0, 0),
70	GPU_CRITERR("icache_l1_predecode_ecc_uncorrected",
71	GPU_SM_ICACHE_L1_PREDECODE_ECC_UNCORRECTED, 0, 0),
72	},
73	},
74	{
75	.name = "fecs",
76	.hw_unit = (u32)NVGPU_ERR_MODULE_FECS,
77	.num_errs = 4U,
78	.base_ecc_service_id =
79	NVGUARD_SERVICE_IGPU_FECS_SWERR_FALCON_IMEM_ECC_CORRECTED,
80	.errs = (struct nvgpu_err_desc[]) {
81	GPU_NONCRITERR("falcon_imem_ecc_corrected",
82	GPU_FECS_FALCON_IMEM_ECC_CORRECTED, 0, 0),
83	GPU_CRITERR("falcon_imem_ecc_uncorrected",
84	GPU_FECS_FALCON_IMEM_ECC_UNCORRECTED, 0, 0),
85	GPU_NONCRITERR("falcon_dmem_ecc_corrected", 0, 0, 0),
86	GPU_CRITERR("falcon_dmem_ecc_uncorrected",
87	GPU_FECS_FALCON_DMEM_ECC_UNCORRECTED, 0, 0),
88	},
89	},
90	{
91	.name = "pmu",
92	.hw_unit = NVGPU_ERR_MODULE_PMU,
93	.num_errs = 4U,
94	.base_ecc_service_id =
95	NVGUARD_SERVICE_IGPU_PMU_SWERR_FALCON_IMEM_ECC_CORRECTED,
96	.errs = (struct nvgpu_err_desc[]) {
97	GPU_NONCRITERR("falcon_imem_ecc_corrected",
98	GPU_PMU_FALCON_IMEM_ECC_CORRECTED, 0, 0),
99	GPU_CRITERR("falcon_imem_ecc_uncorrected",
100	GPU_PMU_FALCON_IMEM_ECC_UNCORRECTED, 0, 0),
101	GPU_NONCRITERR("falcon_dmem_ecc_corrected", 0, 0, 0),
102	GPU_CRITERR("falcon_dmem_ecc_uncorrected",
103	GPU_PMU_FALCON_DMEM_ECC_UNCORRECTED, 0, 0),
104	},
105	},
106	};
107
108	static void nvgpu_init_err_msg_header(struct gpu_err_header *header)
109	{
110	header->version.major = (u16)1U;
111	header->version.minor = (u16)0U;
112	header->sub_err_type = 0U;
113	header->sub_unit_id = 0UL;
114	header->address = 0UL;
115	header->timestamp_ns = 0UL;
116	}
117
118	static void nvgpu_init_ecc_err_msg(struct gpu_ecc_error_info *err_info)
119	{
120	nvgpu_init_err_msg_header(&err_info->header);
121	err_info->err_cnt = 0UL;
122	}
123
124	static void nvgpu_report_ecc_error_linux(struct gk20a *g, u32 hw_unit, u32 inst,
125	u32 err_id, u64 err_addr, u64 err_count)
126	{
127	int err = 0;
128	u32 s_id = 0;
129	u8 err_status = 0;
130	u8 err_info_size = 0;
131	u64 timestamp = 0ULL;
132	int err_threshold_counter = 0;
133	struct gpu_ecc_error_info err_pkt;
134	struct nvgpu_err_desc *err_desc = NULL;
135	struct nvgpu_err_hw_module *hw_module = NULL;
136	nv_guard_request_t req;
137
138	memset(&req, 0, sizeof(req));
139	nvgpu_init_ecc_err_msg(&err_pkt);
140	if (hw_unit >= sizeof(gv11b_err_lut)/sizeof(gv11b_err_lut[0])) {
141	err = -EINVAL;
142	goto done;
143	}
144
145	hw_module = &gv11b_err_lut[hw_unit];
146	if (err_id >= hw_module->num_errs) {
147	nvgpu_err(g, "invalid err_id (%u) for hw module (%u)",
148	err_id, hw_module->hw_unit);
149	err = -EINVAL;
150	goto done;
151	}
152	err_desc = &hw_module->errs[err_id];
153	timestamp = (u64)nvgpu_current_time_ns();
154
155	err_pkt.header.timestamp_ns = timestamp;
156	err_pkt.header.sub_unit_id = inst;
157	err_pkt.header.address = err_addr;
158	err_pkt.err_cnt = err_count;
159	err_info_size = sizeof(err_pkt);
160
161	s_id = hw_module->base_ecc_service_id + err_id;
162
163	if (err_desc->is_critical) {
164	err_status = NVGUARD_ERROR_DETECTED;
165	} else {
166	err_status = NVGUARD_NO_ERROR;
167	}
168
169	nvgpu_atomic_inc(&err_desc->err_count);
170	err_threshold_counter = nvgpu_atomic_cmpxchg(&err_desc->err_count,
171	err_desc->err_threshold + 1, 0);
172
173	if (unlikely(err_threshold_counter != err_desc->err_threshold + 1)) {
174	goto done;
175	}
176
177	nvgpu_log(g, gpu_dbg_ecc, "ECC reporting hw: %s, desc:%s, count:%llu",
178	hw_module->name, err_desc->name, err_count);
179
180	req.srv_id_cmd = NVGUARD_SERVICESTATUS_NOTIFICATION;
181	req.srv_status.srv_id = (nv_guard_service_id_t)s_id;
182	req.srv_status.status = err_status;
183	req.srv_status.timestamp = timestamp;
184	req.srv_status.error_info_size = err_info_size;
185	memcpy(req.srv_status.error_info, (u8*)&err_pkt, err_info_size);
186
187	/*
188	* l1ss_submit_rq may fail due to kmalloc failures but may pass in
189	* subsequent calls
190	*/
191	err = l1ss_submit_rq(&req, true);
192	if (err != 0) {
193	nvgpu_err(g, "Error returned from L1SS submit %d", err);
194	}
195
196	if (err_desc->is_critical) {
197	nvgpu_quiesce(g);
198	}
199
200	done:
201	return;
202	}
203
204	static void nvgpu_report_ecc_error_empty(struct gk20a *g, u32 hw_unit, u32 inst,
205	u32 err_id, u64 err_addr, u64 err_count) {
206	nvgpu_log(g, gpu_dbg_ecc, "ECC reporting empty");
207	}
208
209	const struct nvgpu_ecc_reporting_ops default_disabled_ecc_report_ops = {
210	.report_ecc_err = nvgpu_report_ecc_error_empty,
211	};
212
213	const struct nvgpu_ecc_reporting_ops ecc_enable_report_ops = {
214	.report_ecc_err = nvgpu_report_ecc_error_linux,
215	};
216
217	static int nvgpu_l1ss_callback(l1ss_cli_callback_param param, void *data)
218	{
219	struct gk20a g = (struct gk20a )data;
220	struct nvgpu_os_linux *l = NULL;
221	struct nvgpu_ecc_reporting_linux *ecc_reporting_linux = NULL;
222	int err = 0;
223	/* Ensure we have a valid gk20a struct before proceeding */
224	if ((g == NULL) \|\| (gk20a_get(g) == NULL)) {
225	return -ENODEV;
226	}
227
228	l = nvgpu_os_linux_from_gk20a(g);
229	ecc_reporting_linux = &l->ecc_reporting_linux;
230
231	nvgpu_spinlock_acquire(&ecc_reporting_linux->common.lock);
232	if (param == L1SS_READY) {
233	if (!ecc_reporting_linux->common.ecc_reporting_service_enabled) {
234	ecc_reporting_linux->common.ecc_reporting_service_enabled = true;
235	ecc_reporting_linux->common.ops = &ecc_enable_report_ops;
236	nvgpu_log(g, gpu_dbg_ecc, "ECC reporting is enabled");
237	}
238	} else if (param == L1SS_NOT_READY) {
239	if (ecc_reporting_linux->common.ecc_reporting_service_enabled) {
240	ecc_reporting_linux->common.ecc_reporting_service_enabled = false;
241	ecc_reporting_linux->common.ops = &default_disabled_ecc_report_ops;
242	nvgpu_log(g, gpu_dbg_ecc, "ECC reporting is disabled");
243	}
244	} else {
245	err = -EINVAL;
246	}
247	nvgpu_spinlock_release(&ecc_reporting_linux->common.lock);
248
249	gk20a_put(g);
250
251	return err;
252	}
253
254	void nvgpu_init_ecc_reporting(struct gk20a *g)
255	{
256	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
257	struct nvgpu_ecc_reporting_linux *ecc_report_linux = &l->ecc_reporting_linux;
258	int err = 0;
259	/* This will invoke the registration API */
260	nvgpu_spinlock_init(&ecc_report_linux->common.lock);
261	ecc_report_linux->priv.id = (NVGUARD_GROUPID_IGPU & NVGUARD_GROUPINDEX_FIELDMASK);
262	ecc_report_linux->priv.cli_callback = nvgpu_l1ss_callback;
263	ecc_report_linux->priv.data = g;
264	ecc_report_linux->common.ops = &default_disabled_ecc_report_ops;
265
266	nvgpu_log(g, gpu_dbg_ecc, "ECC reporting Init");
267
268	/*
269	* err == 0 indicates service is available but not active yet.
270	* err == 1 indicates service is available and active
271	* error for other cases.
272	*/
273	err = l1ss_register_client(&ecc_report_linux->priv);
274	if (err == 0) {
275	ecc_report_linux->common.ecc_reporting_service_enabled = false;
276	nvgpu_log(g, gpu_dbg_ecc, "ECC reporting init success");
277	} else if (err == 1) {
278	ecc_report_linux->common.ecc_reporting_service_enabled = true;
279	/* Actual Ops will be replaced during nvgpu_enable_ecc_reporting
280	* called as part of gk20a_busy()
281	*/
282	} else {
283	nvgpu_log(g, gpu_dbg_ecc, "ECC reporting init failure %d", err);
284	}
285	}
286
287	void nvgpu_deinit_ecc_reporting(struct gk20a *g)
288	{
289	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
290	struct nvgpu_ecc_reporting_linux *ecc_report_linux = &l->ecc_reporting_linux;
291
292	if (ecc_report_linux->common.ecc_reporting_service_enabled) {
293	ecc_report_linux->common.ecc_reporting_service_enabled = false;
294	l1ss_deregister_client(ecc_report_linux->priv.id);
295	memset(ecc_report_linux, 0, sizeof(*ecc_report_linux));
296	nvgpu_log(g, gpu_dbg_ecc, "ECC reporting de-init success");
297	}
298
299	}
300
301	void nvgpu_enable_ecc_reporting(struct gk20a *g)
302	{
303	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
304	struct nvgpu_ecc_reporting_linux *ecc_report_linux = &l->ecc_reporting_linux;
305	struct nvgpu_ecc_reporting *error_reporting = &ecc_report_linux->common;
306
307	nvgpu_spinlock_acquire(&ecc_report_linux->common.lock);
308	if (error_reporting->ecc_reporting_service_enabled) {
309	error_reporting->ops = &ecc_enable_report_ops;
310	nvgpu_log(g, gpu_dbg_ecc, "ECC reporting is enabled");
311	}
312	nvgpu_spinlock_release(&ecc_report_linux->common.lock);
313	}
314
315	void nvgpu_disable_ecc_reporting(struct gk20a *g)
316	{
317	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
318	struct nvgpu_ecc_reporting_linux *ecc_report_linux = &l->ecc_reporting_linux;
319	struct nvgpu_ecc_reporting *error_reporting = &ecc_report_linux->common;
320
321	nvgpu_spinlock_acquire(&ecc_report_linux->common.lock);
322	error_reporting->ops = &default_disabled_ecc_report_ops;
323	nvgpu_log(g, gpu_dbg_ecc, "ECC reporting is disabled");
324	nvgpu_spinlock_release(&ecc_report_linux->common.lock);
325	}
326
327	void nvgpu_report_ecc_err(struct gk20a *g, u32 hw_unit, u32 inst,
328	u32 err_id, u64 err_addr, u64 err_count)
329	{
330	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
331	struct nvgpu_ecc_reporting_linux *ecc_report_linux = &l->ecc_reporting_linux;
332	struct nvgpu_ecc_reporting *error_reporting = &ecc_report_linux->common;
333	void (report_ecc_err_func)(struct gk20a g, u32 hw_unit, u32 inst,
334	u32 err_id, u64 err_addr, u64 err_count);
335
336	nvgpu_spinlock_acquire(&ecc_report_linux->common.lock);
337	report_ecc_err_func = error_reporting->ops->report_ecc_err;
338	nvgpu_spinlock_release(&ecc_report_linux->common.lock);
339
340	report_ecc_err_func(g, hw_unit, inst, err_id, err_addr, err_count);
341	}