4 files changed, 416 insertions, 3 deletions
diff --git a/drivers/gpu/nvgpu/os/linux/ecc_linux.h b/drivers/gpu/nvgpu/os/linux/ecc_linux.h
new file mode 100644
index 00000000..7e0f650b
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/ecc_linux.h
@@ -0,0 +1,49 @@
+/*
+ *
+ * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NVGPU_OS_ECC_LINUX_H
+#define NVGPU_OS_ECC_LINUX_H
+#ifdef CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING
+#include <linux/tegra_l1ss_kernel_interface.h>
+#include <linux/tegra_l1ss_ioctl.h>
+#include <linux/tegra_nv_guard_service_id.h>
+#include <linux/tegra_nv_guard_group_id.h>
+#include <nvgpu/nvgpu_err.h>
+struct nvgpu_ecc_reporting_linux {
+    struct nvgpu_ecc_reporting common;
+    client_param_t priv;
+};
+static inline struct nvgpu_ecc_reporting_linux *get_ecc_reporting_linux(
+    struct nvgpu_ecc_reporting *ecc_report)
+{
+        return container_of(ecc_report, struct nvgpu_ecc_reporting_linux, common);
+}
+#endif /* CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING */
+#endif
+\ No newline at end of file
diff --git a/drivers/gpu/nvgpu/os/linux/module.c b/drivers/gpu/nvgpu/os/linux/module.c
index 807df2ca..fdbab46d 100644
--- a/drivers/gpu/nvgpu/os/linux/module.c
+++ b/drivers/gpu/nvgpu/os/linux/module.c
@@ -1,7 +1,7 @@
 /*
 * GK20A Graphics
 *
- * Copyright (c) 2011-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2021, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -49,6 +49,7 @@
 #include <nvgpu/clk_arb.h>
 #include <nvgpu/timers.h>
 #include <nvgpu/channel.h>
+#include <nvgpu/nvgpu_err.h>
 #include "platform_gk20a.h"
 #include "sysfs.h"
@@ -355,6 +356,10 @@ int gk20a_pm_finalize_poweron(struct device *dev)
                gk20a_init_cde_support(l);
 #endif
+#ifdef CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING
+        nvgpu_enable_ecc_reporting(g);
+#endif
        err = gk20a_sched_ctrl_init(g);
        if (err) {
                nvgpu_err(g, "failed to init sched control");
@@ -364,9 +369,14 @@ int gk20a_pm_finalize_poweron(struct device *dev)
        g->sw_ready = true;
 done:
-        if (err)
+        if (err) {
                g->power_on = false;
+#ifdef CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING
+                nvgpu_disable_ecc_reporting(g);
+#endif
+        }
        nvgpu_mutex_release(&g->power_lock);
        return err;
 }
@@ -433,6 +443,10 @@ static int gk20a_pm_prepare_poweroff(struct device *dev)
        /* Stop CPU from accessing the GPU registers. */
        gk20a_lockout_registers(g);
+#ifdef CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING
+        nvgpu_disable_ecc_reporting(g);
+#endif
        nvgpu_hide_usermode_for_poweroff(g);
        nvgpu_mutex_release(&g->power_lock);
        return 0;
@@ -1382,6 +1396,10 @@ static int gk20a_probe(struct platform_device *dev)
                goto return_err;
        }
+#ifdef CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING
+        nvgpu_init_ecc_reporting(gk20a);
+#endif
        gk20a->nvgpu_reboot_nb.notifier_call =
                nvgpu_kernel_shutdown_notification;
        err = register_reboot_notifier(&gk20a->nvgpu_reboot_nb);
diff --git a/drivers/gpu/nvgpu/os/linux/os_linux.h b/drivers/gpu/nvgpu/os/linux/os_linux.h
index 25c6c03a..adcfdb2f 100644
--- a/drivers/gpu/nvgpu/os/linux/os_linux.h
+++ b/drivers/gpu/nvgpu/os/linux/os_linux.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2017-2021, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -25,6 +25,7 @@
 #include "cde.h"
 #include "sched.h"
+#include "ecc_linux.h"
 struct nvgpu_os_linux_ops {
        struct {
@@ -134,6 +135,10 @@ struct nvgpu_os_linux {
        u64 regs_bus_addr;
+#ifdef CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING
+        struct nvgpu_ecc_reporting_linux ecc_reporting_linux;
+#endif
        struct nvgpu_os_linux_ops ops;
 #ifdef CONFIG_DEBUG_FS
diff --git a/drivers/gpu/nvgpu/os/linux/sdl.c b/drivers/gpu/nvgpu/os/linux/sdl.c
new file mode 100644
index 00000000..c4dccdc6
--- /dev/null
+++ b/drivers/gpu/nvgpu/os/linux/sdl.c
@@ -0,0 +1,341 @@
+/*
+ * Copyright (c) 2021, NVIDIA Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <nvgpu/gk20a.h>
+#include <nvgpu/types.h>
+#include <nvgpu/nvgpu_err.h>
+#include <nvgpu/timers.h>
+#include <nvgpu/bug.h>
+#include "ecc_linux.h"
+#include "os_linux.h"
+#include "module.h"
+/* This look-up table initializes the list of hw units and their errors.
+ * It also specifies the error injection mechanism supported, for each error.
+ * In case of hw error injection support, this initialization will be overriden
+ * by the values provided from the hal layes of corresponding hw units.
+ */
+static struct nvgpu_err_hw_module gv11b_err_lut[] = {
+        {
+                .name = "sm",
+                .hw_unit = (u32)NVGPU_ERR_MODULE_SM,
+                .num_errs = 21U,
+                .base_ecc_service_id =
+                        NVGUARD_SERVICE_IGPU_SM_SWERR_L1_TAG_ECC_CORRECTED,
+                .errs = (struct nvgpu_err_desc[]) {
+                        GPU_NONCRITERR("l1_tag_ecc_corrected",
+                                        GPU_SM_L1_TAG_ECC_CORRECTED, 0, 0),
+                        GPU_CRITERR("l1_tag_ecc_uncorrected",
+                                        GPU_SM_L1_TAG_ECC_UNCORRECTED, 0, 0),
+                        GPU_NONCRITERR("cbu_ecc_corrected", 0, 0, 0),
+                        GPU_CRITERR("cbu_ecc_uncorrected",
+                                        GPU_SM_CBU_ECC_UNCORRECTED, 0, 0),
+                        GPU_NONCRITERR("lrf_ecc_corrected", 0, 0, 0),
+                        GPU_CRITERR("lrf_ecc_uncorrected",
+                                        GPU_SM_LRF_ECC_UNCORRECTED, 0, 0),
+                        GPU_NONCRITERR("l1_data_ecc_corrected", 0, 0, 0),
+                        GPU_CRITERR("l1_data_ecc_uncorrected",
+                                        GPU_SM_L1_DATA_ECC_UNCORRECTED, 0, 0),
+                        GPU_NONCRITERR("icache_l0_data_ecc_corrected", 0, 0, 0),
+                        GPU_CRITERR("icache_l0_data_ecc_uncorrected",
+                                        GPU_SM_ICACHE_L0_DATA_ECC_UNCORRECTED, 0, 0),
+                        GPU_NONCRITERR("icache_l1_data_ecc_corrected", 0, 0, 0),
+                        GPU_CRITERR("icache_l1_data_ecc_uncorrected",
+                                        GPU_SM_ICACHE_L1_DATA_ECC_UNCORRECTED, 0, 0),
+                        GPU_NONCRITERR("icache_l0_predecode_ecc_corrected", 0, 0, 0),
+                        GPU_CRITERR("icache_l0_predecode_ecc_uncorrected",
+                                        GPU_SM_ICACHE_L0_PREDECODE_ECC_UNCORRECTED, 0, 0),
+                        GPU_NONCRITERR("l1_tag_miss_fifo_ecc_corrected", 0, 0, 0),
+                        GPU_CRITERR("l1_tag_miss_fifo_ecc_uncorrected",
+                                        GPU_SM_L1_TAG_MISS_FIFO_ECC_UNCORRECTED, 0, 0),
+                        GPU_NONCRITERR("l1_tag_s2r_pixprf_ecc_corrected", 0, 0, 0),
+                        GPU_CRITERR("l1_tag_s2r_pixprf_ecc_uncorrected",
+                                        GPU_SM_L1_TAG_S2R_PIXPRF_ECC_UNCORRECTED, 0, 0),
+                        GPU_CRITERR("machine_check_error", 0, 0, 0),
+                        GPU_NONCRITERR("icache_l1_predecode_ecc_corrected", 0, 0, 0),
+                        GPU_CRITERR("icache_l1_predecode_ecc_uncorrected",
+                                        GPU_SM_ICACHE_L1_PREDECODE_ECC_UNCORRECTED, 0, 0),
+                },
+        },
+        {
+                .name = "fecs",
+                .hw_unit = (u32)NVGPU_ERR_MODULE_FECS,
+                .num_errs = 4U,
+                .base_ecc_service_id =
+                        NVGUARD_SERVICE_IGPU_FECS_SWERR_FALCON_IMEM_ECC_CORRECTED,
+                .errs = (struct nvgpu_err_desc[]) {
+                        GPU_NONCRITERR("falcon_imem_ecc_corrected",
+                                        GPU_FECS_FALCON_IMEM_ECC_CORRECTED, 0, 0),
+                        GPU_CRITERR("falcon_imem_ecc_uncorrected",
+                                        GPU_FECS_FALCON_IMEM_ECC_UNCORRECTED, 0, 0),
+                        GPU_NONCRITERR("falcon_dmem_ecc_corrected", 0, 0, 0),
+                        GPU_CRITERR("falcon_dmem_ecc_uncorrected",
+                                        GPU_FECS_FALCON_DMEM_ECC_UNCORRECTED, 0, 0),
+                },
+        },
+        {
+                .name = "pmu",
+                .hw_unit = NVGPU_ERR_MODULE_PMU,
+                .num_errs = 4U,
+                .base_ecc_service_id =
+                        NVGUARD_SERVICE_IGPU_PMU_SWERR_FALCON_IMEM_ECC_CORRECTED,
+                .errs = (struct nvgpu_err_desc[]) {
+                        GPU_NONCRITERR("falcon_imem_ecc_corrected",
+                                        GPU_PMU_FALCON_IMEM_ECC_CORRECTED, 0, 0),
+                        GPU_CRITERR("falcon_imem_ecc_uncorrected",
+                                        GPU_PMU_FALCON_IMEM_ECC_UNCORRECTED, 0, 0),
+                        GPU_NONCRITERR("falcon_dmem_ecc_corrected", 0, 0, 0),
+                        GPU_CRITERR("falcon_dmem_ecc_uncorrected",
+                                        GPU_PMU_FALCON_DMEM_ECC_UNCORRECTED, 0, 0),
+                },
+        },
+};
+static void nvgpu_init_err_msg_header(struct gpu_err_header *header)
+{
+        header->version.major = (u16)1U;
+        header->version.minor = (u16)0U;
+        header->sub_err_type = 0U;
+        header->sub_unit_id = 0UL;
+        header->address = 0UL;
+        header->timestamp_ns = 0UL;
+}
+static void nvgpu_init_ecc_err_msg(struct gpu_ecc_error_info *err_info)
+{
+        nvgpu_init_err_msg_header(&err_info->header);
+        err_info->err_cnt = 0UL;
+}
+static void nvgpu_report_ecc_error_linux(struct gk20a *g, u32 hw_unit, u32 inst,
+                u32 err_id, u64 err_addr, u64 err_count)
+{
+        int err = 0;
+        u32 s_id = 0;
+        u8 err_status = 0;
+        u8 err_info_size = 0;
+        u64 timestamp = 0ULL;
+        int err_threshold_counter = 0;
+        struct gpu_ecc_error_info err_pkt;
+        struct nvgpu_err_desc *err_desc = NULL;
+        struct nvgpu_err_hw_module *hw_module = NULL;
+        nv_guard_request_t req;
+        memset(&req, 0, sizeof(req));
+        nvgpu_init_ecc_err_msg(&err_pkt);
+        if (hw_unit >= sizeof(gv11b_err_lut)/sizeof(gv11b_err_lut[0])) {
+                err = -EINVAL;
+                goto done;
+        }
+        hw_module = &gv11b_err_lut[hw_unit];
+        if (err_id >= hw_module->num_errs) {
+                nvgpu_err(g, "invalid err_id (%u) for hw module (%u)",
+                        err_id, hw_module->hw_unit);
+                err = -EINVAL;
+                goto done;
+        }
+        err_desc = &hw_module->errs[err_id];
+        timestamp = (u64)nvgpu_current_time_ns();
+        err_pkt.header.timestamp_ns = timestamp;
+        err_pkt.header.sub_unit_id = inst;
+        err_pkt.header.address = err_addr;
+        err_pkt.err_cnt = err_count;
+        err_info_size = sizeof(err_pkt);
+        s_id = hw_module->base_ecc_service_id + err_id;
+        if (err_desc->is_critical) {
+                err_status = NVGUARD_ERROR_DETECTED;
+        } else {
+                err_status = NVGUARD_NO_ERROR;
+        }
+        nvgpu_atomic_inc(&err_desc->err_count);
+        err_threshold_counter = nvgpu_atomic_cmpxchg(&err_desc->err_count,
+                        err_desc->err_threshold + 1, 0);
+        if (unlikely(err_threshold_counter != err_desc->err_threshold + 1)) {
+                goto done;
+        }
+        nvgpu_log(g, gpu_dbg_ecc, "ECC reporting hw: %s, desc:%s, count:%llu",
+                hw_module->name, err_desc->name, err_count);
+        req.srv_id_cmd = NVGUARD_SERVICESTATUS_NOTIFICATION;
+        req.srv_status.srv_id = (nv_guard_service_id_t)s_id;
+        req.srv_status.status = err_status;
+        req.srv_status.timestamp = timestamp;
+        req.srv_status.error_info_size = err_info_size;
+        memcpy(req.srv_status.error_info, (u8*)&err_pkt, err_info_size);
+        /*
+         * l1ss_submit_rq may fail due to kmalloc failures but may pass in
+         * subsequent calls
+         */
+        err = l1ss_submit_rq(&req, true);
+        if (err != 0) {
+                nvgpu_err(g, "Error returned from L1SS submit %d", err);
+        }
+        if (err_desc->is_critical) {
+                nvgpu_quiesce(g);
+        }
+done:
+        return;
+}
+static void nvgpu_report_ecc_error_empty(struct gk20a *g, u32 hw_unit, u32 inst,
+                u32 err_id, u64 err_addr, u64 err_count) {
+                nvgpu_log(g, gpu_dbg_ecc, "ECC reporting empty");
+}
+const struct nvgpu_ecc_reporting_ops default_disabled_ecc_report_ops = {
+        .report_ecc_err = nvgpu_report_ecc_error_empty,
+};
+const struct nvgpu_ecc_reporting_ops ecc_enable_report_ops = {
+        .report_ecc_err = nvgpu_report_ecc_error_linux,
+};
+static int nvgpu_l1ss_callback(l1ss_cli_callback_param param, void *data)
+{
+        struct gk20a *g = (struct gk20a *)data;
+        struct nvgpu_os_linux *l = NULL;
+        struct nvgpu_ecc_reporting_linux *ecc_reporting_linux = NULL;
+        int err = 0;
+        /* Ensure we have a valid gk20a struct before proceeding */
+        if ((g == NULL) || (gk20a_get(g) == NULL)) {
+                return -ENODEV;
+        }
+        l = nvgpu_os_linux_from_gk20a(g);
+        ecc_reporting_linux = &l->ecc_reporting_linux;
+        nvgpu_spinlock_acquire(&ecc_reporting_linux->common.lock);
+        if (param == L1SS_READY) {
+                if (!ecc_reporting_linux->common.ecc_reporting_service_enabled) {
+                        ecc_reporting_linux->common.ecc_reporting_service_enabled = true;
+                        ecc_reporting_linux->common.ops = &ecc_enable_report_ops;
+                        nvgpu_log(g, gpu_dbg_ecc, "ECC reporting is enabled");
+                }
+        } else if (param == L1SS_NOT_READY) {
+                if (ecc_reporting_linux->common.ecc_reporting_service_enabled) {
+                        ecc_reporting_linux->common.ecc_reporting_service_enabled = false;
+                        ecc_reporting_linux->common.ops = &default_disabled_ecc_report_ops;
+                        nvgpu_log(g, gpu_dbg_ecc, "ECC reporting is disabled");
+                }
+        } else {
+                err = -EINVAL;
+        }
+        nvgpu_spinlock_release(&ecc_reporting_linux->common.lock);
+        gk20a_put(g);
+        return err;
+}
+void nvgpu_init_ecc_reporting(struct gk20a *g)
+{
+        struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+        struct nvgpu_ecc_reporting_linux *ecc_report_linux = &l->ecc_reporting_linux;
+        int err = 0;
+        /* This will invoke the registration API */
+        nvgpu_spinlock_init(&ecc_report_linux->common.lock);
+        ecc_report_linux->priv.id = (NVGUARD_GROUPID_IGPU & NVGUARD_GROUPINDEX_FIELDMASK);
+        ecc_report_linux->priv.cli_callback = nvgpu_l1ss_callback;
+        ecc_report_linux->priv.data = g;
+        ecc_report_linux->common.ops = &default_disabled_ecc_report_ops;
+        nvgpu_log(g, gpu_dbg_ecc, "ECC reporting Init");
+        /*
+         * err == 0 indicates service is available but not active yet.
+         * err == 1 indicates service is available and active
+         * error for other cases.
+         */
+        err = l1ss_register_client(&ecc_report_linux->priv);
+        if (err == 0) {
+                ecc_report_linux->common.ecc_reporting_service_enabled = false;
+                nvgpu_log(g, gpu_dbg_ecc, "ECC reporting init success");
+        } else if (err == 1) {
+                ecc_report_linux->common.ecc_reporting_service_enabled = true;
+                /* Actual Ops will be replaced during nvgpu_enable_ecc_reporting
+                 * called as part of gk20a_busy()
+                 */
+        } else {
+                nvgpu_log(g, gpu_dbg_ecc, "ECC reporting init failure %d", err);
+        }
+}
+void nvgpu_deinit_ecc_reporting(struct gk20a *g)
+{
+        struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+        struct nvgpu_ecc_reporting_linux *ecc_report_linux = &l->ecc_reporting_linux;
+        if (ecc_report_linux->common.ecc_reporting_service_enabled) {
+                ecc_report_linux->common.ecc_reporting_service_enabled = false;
+                l1ss_deregister_client(ecc_report_linux->priv.id);
+                memset(ecc_report_linux, 0, sizeof(*ecc_report_linux));
+                nvgpu_log(g, gpu_dbg_ecc, "ECC reporting de-init success");
+        }
+}
+void nvgpu_enable_ecc_reporting(struct gk20a *g)
+{
+        struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+        struct nvgpu_ecc_reporting_linux *ecc_report_linux = &l->ecc_reporting_linux;
+        struct nvgpu_ecc_reporting *error_reporting = &ecc_report_linux->common;
+        nvgpu_spinlock_acquire(&ecc_report_linux->common.lock);
+        if (error_reporting->ecc_reporting_service_enabled) {
+                error_reporting->ops = &ecc_enable_report_ops;
+                nvgpu_log(g, gpu_dbg_ecc, "ECC reporting is enabled");
+        }
+        nvgpu_spinlock_release(&ecc_report_linux->common.lock);
+}
+void nvgpu_disable_ecc_reporting(struct gk20a *g)
+{
+        struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+        struct nvgpu_ecc_reporting_linux *ecc_report_linux = &l->ecc_reporting_linux;
+        struct nvgpu_ecc_reporting *error_reporting = &ecc_report_linux->common;
+        nvgpu_spinlock_acquire(&ecc_report_linux->common.lock);
+        error_reporting->ops = &default_disabled_ecc_report_ops;
+        nvgpu_log(g, gpu_dbg_ecc, "ECC reporting is disabled");
+        nvgpu_spinlock_release(&ecc_report_linux->common.lock);
+}
+void nvgpu_report_ecc_err(struct gk20a *g, u32 hw_unit, u32 inst,
+                u32 err_id, u64 err_addr, u64 err_count)
+{
+        struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+        struct nvgpu_ecc_reporting_linux *ecc_report_linux = &l->ecc_reporting_linux;
+        struct nvgpu_ecc_reporting *error_reporting = &ecc_report_linux->common;
+        void (*report_ecc_err_func)(struct gk20a *g, u32 hw_unit, u32 inst,
+                u32 err_id, u64 err_addr, u64 err_count);
+        nvgpu_spinlock_acquire(&ecc_report_linux->common.lock);
+        report_ecc_err_func = error_reporting->ops->report_ecc_err;
+        nvgpu_spinlock_release(&ecc_report_linux->common.lock);
+        report_ecc_err_func(g, hw_unit, inst, err_id, err_addr, err_count);
+}

diff --git a/drivers/gpu/nvgpu/os/linux/ecc_linux.h b/drivers/gpu/nvgpu/os/linux/ecc_linux.h new file mode 100644 index 00000000..7e0f650b --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/ecc_linux.h
@@ -0,0 +1,49 @@
		1	/*
		2	*
		3	* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
		4	*
		5	* Permission is hereby granted, free of charge, to any person obtaining a
		6	* copy of this software and associated documentation files (the "Software"),
		7	* to deal in the Software without restriction, including without limitation
		8	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
		9	* and/or sell copies of the Software, and to permit persons to whom the
		10	* Software is furnished to do so, subject to the following conditions:
		11	*
		12	* The above copyright notice and this permission notice shall be included in
		13	* all copies or substantial portions of the Software.
		14	*
		15	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
		16	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
		17	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
		18	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
		19	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
		20	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
		21	* DEALINGS IN THE SOFTWARE.
		22	*/
		23
		24	#ifndef NVGPU_OS_ECC_LINUX_H
		25	#define NVGPU_OS_ECC_LINUX_H
		26
		27	#ifdef CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING
		28
		29	#include <linux/tegra_l1ss_kernel_interface.h>
		30	#include <linux/tegra_l1ss_ioctl.h>
		31	#include <linux/tegra_nv_guard_service_id.h>
		32	#include <linux/tegra_nv_guard_group_id.h>
		33
		34	#include <nvgpu/nvgpu_err.h>
		35
		36	struct nvgpu_ecc_reporting_linux {
		37	struct nvgpu_ecc_reporting common;
		38	client_param_t priv;
		39	};
		40
		41	static inline struct nvgpu_ecc_reporting_linux *get_ecc_reporting_linux(
		42	struct nvgpu_ecc_reporting *ecc_report)
		43	{
		44	return container_of(ecc_report, struct nvgpu_ecc_reporting_linux, common);
		45	}
		46
		47	#endif /* CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING */
		48
		49	#endif \ No newline at end of file


diff --git a/drivers/gpu/nvgpu/os/linux/module.c b/drivers/gpu/nvgpu/os/linux/module.c index 807df2ca..fdbab46d 100644 --- a/drivers/gpu/nvgpu/os/linux/module.c +++ b/drivers/gpu/nvgpu/os/linux/module.c
@@ -1,7 +1,7 @@
1	/*	1	/*
2	* GK20A Graphics	2	* GK20A Graphics
3	*	3	*
4	* Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved.	4	* Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved.
5	*	5	*
6	* This program is free software; you can redistribute it and/or modify it	6	* This program is free software; you can redistribute it and/or modify it
7	* under the terms and conditions of the GNU General Public License,	7	* under the terms and conditions of the GNU General Public License,
@@ -49,6 +49,7 @@
49	#include <nvgpu/clk_arb.h>	49	#include <nvgpu/clk_arb.h>
50	#include <nvgpu/timers.h>	50	#include <nvgpu/timers.h>
51	#include <nvgpu/channel.h>	51	#include <nvgpu/channel.h>
		52	#include <nvgpu/nvgpu_err.h>
52		53
53	#include "platform_gk20a.h"	54	#include "platform_gk20a.h"
54	#include "sysfs.h"	55	#include "sysfs.h"
@@ -355,6 +356,10 @@ int gk20a_pm_finalize_poweron(struct device *dev)
355	gk20a_init_cde_support(l);	356	gk20a_init_cde_support(l);
356	#endif	357	#endif
357		358
		359	#ifdef CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING
		360	nvgpu_enable_ecc_reporting(g);
		361	#endif
		362
358	err = gk20a_sched_ctrl_init(g);	363	err = gk20a_sched_ctrl_init(g);
359	if (err) {	364	if (err) {
360	nvgpu_err(g, "failed to init sched control");	365	nvgpu_err(g, "failed to init sched control");
@@ -364,9 +369,14 @@ int gk20a_pm_finalize_poweron(struct device *dev)
364	g->sw_ready = true;	369	g->sw_ready = true;
365		370
366	done:	371	done:
367	if (err)	372	if (err) {
368	g->power_on = false;	373	g->power_on = false;
369		374
		375	#ifdef CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING
		376	nvgpu_disable_ecc_reporting(g);
		377	#endif
		378	}
		379
370	nvgpu_mutex_release(&g->power_lock);	380	nvgpu_mutex_release(&g->power_lock);
371	return err;	381	return err;
372	}	382	}
@@ -433,6 +443,10 @@ static int gk20a_pm_prepare_poweroff(struct device *dev)
433	/* Stop CPU from accessing the GPU registers. */	443	/* Stop CPU from accessing the GPU registers. */
434	gk20a_lockout_registers(g);	444	gk20a_lockout_registers(g);
435		445
		446	#ifdef CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING
		447	nvgpu_disable_ecc_reporting(g);
		448	#endif
		449
436	nvgpu_hide_usermode_for_poweroff(g);	450	nvgpu_hide_usermode_for_poweroff(g);
437	nvgpu_mutex_release(&g->power_lock);	451	nvgpu_mutex_release(&g->power_lock);
438	return 0;	452	return 0;
@@ -1382,6 +1396,10 @@ static int gk20a_probe(struct platform_device *dev)
1382	goto return_err;	1396	goto return_err;
1383	}	1397	}
1384		1398
		1399	#ifdef CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING
		1400	nvgpu_init_ecc_reporting(gk20a);
		1401	#endif
		1402
1385	gk20a->nvgpu_reboot_nb.notifier_call =	1403	gk20a->nvgpu_reboot_nb.notifier_call =
1386	nvgpu_kernel_shutdown_notification;	1404	nvgpu_kernel_shutdown_notification;
1387	err = register_reboot_notifier(&gk20a->nvgpu_reboot_nb);	1405	err = register_reboot_notifier(&gk20a->nvgpu_reboot_nb);


diff --git a/drivers/gpu/nvgpu/os/linux/os_linux.h b/drivers/gpu/nvgpu/os/linux/os_linux.h index 25c6c03a..adcfdb2f 100644 --- a/drivers/gpu/nvgpu/os/linux/os_linux.h +++ b/drivers/gpu/nvgpu/os/linux/os_linux.h
@@ -1,5 +1,5 @@
1	/*	1	/*
2	* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.	2	* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
3	*	3	*
4	* This program is free software; you can redistribute it and/or modify it	4	* This program is free software; you can redistribute it and/or modify it
5	* under the terms and conditions of the GNU General Public License,	5	* under the terms and conditions of the GNU General Public License,
@@ -25,6 +25,7 @@
25		25
26	#include "cde.h"	26	#include "cde.h"
27	#include "sched.h"	27	#include "sched.h"
		28	#include "ecc_linux.h"
28		29
29	struct nvgpu_os_linux_ops {	30	struct nvgpu_os_linux_ops {
30	struct {	31	struct {
@@ -134,6 +135,10 @@ struct nvgpu_os_linux {
134		135
135	u64 regs_bus_addr;	136	u64 regs_bus_addr;
136		137
		138	#ifdef CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING
		139	struct nvgpu_ecc_reporting_linux ecc_reporting_linux;
		140	#endif
		141
137	struct nvgpu_os_linux_ops ops;	142	struct nvgpu_os_linux_ops ops;
138		143
139	#ifdef CONFIG_DEBUG_FS	144	#ifdef CONFIG_DEBUG_FS


diff --git a/drivers/gpu/nvgpu/os/linux/sdl.c b/drivers/gpu/nvgpu/os/linux/sdl.c new file mode 100644 index 00000000..c4dccdc6 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/sdl.c
@@ -0,0 +1,341 @@
		1	/*
		2	* Copyright (c) 2021, NVIDIA Corporation. All rights reserved.
		3	*
		4	* This program is free software; you can redistribute it and/or modify it
		5	* under the terms and conditions of the GNU General Public License,
		6	* version 2, as published by the Free Software Foundation.
		7	*
		8	* This program is distributed in the hope it will be useful, but WITHOUT
		9	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
		10	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
		11	* more details.
		12	*
		13	* You should have received a copy of the GNU General Public License
		14	* along with this program. If not, see <http://www.gnu.org/licenses/>.
		15	*/
		16
		17	#include <nvgpu/gk20a.h>
		18	#include <nvgpu/types.h>
		19	#include <nvgpu/nvgpu_err.h>
		20	#include <nvgpu/timers.h>
		21	#include <nvgpu/bug.h>
		22
		23	#include "ecc_linux.h"
		24	#include "os_linux.h"
		25	#include "module.h"
		26
		27	/* This look-up table initializes the list of hw units and their errors.
		28	* It also specifies the error injection mechanism supported, for each error.
		29	* In case of hw error injection support, this initialization will be overriden
		30	* by the values provided from the hal layes of corresponding hw units.
		31	*/
		32	static struct nvgpu_err_hw_module gv11b_err_lut[] = {
		33	{
		34	.name = "sm",
		35	.hw_unit = (u32)NVGPU_ERR_MODULE_SM,
		36	.num_errs = 21U,
		37	.base_ecc_service_id =
		38	NVGUARD_SERVICE_IGPU_SM_SWERR_L1_TAG_ECC_CORRECTED,
		39	.errs = (struct nvgpu_err_desc[]) {
		40	GPU_NONCRITERR("l1_tag_ecc_corrected",
		41	GPU_SM_L1_TAG_ECC_CORRECTED, 0, 0),
		42	GPU_CRITERR("l1_tag_ecc_uncorrected",
		43	GPU_SM_L1_TAG_ECC_UNCORRECTED, 0, 0),
		44	GPU_NONCRITERR("cbu_ecc_corrected", 0, 0, 0),
		45	GPU_CRITERR("cbu_ecc_uncorrected",
		46	GPU_SM_CBU_ECC_UNCORRECTED, 0, 0),
		47	GPU_NONCRITERR("lrf_ecc_corrected", 0, 0, 0),
		48	GPU_CRITERR("lrf_ecc_uncorrected",
		49	GPU_SM_LRF_ECC_UNCORRECTED, 0, 0),
		50	GPU_NONCRITERR("l1_data_ecc_corrected", 0, 0, 0),
		51	GPU_CRITERR("l1_data_ecc_uncorrected",
		52	GPU_SM_L1_DATA_ECC_UNCORRECTED, 0, 0),
		53	GPU_NONCRITERR("icache_l0_data_ecc_corrected", 0, 0, 0),
		54	GPU_CRITERR("icache_l0_data_ecc_uncorrected",
		55	GPU_SM_ICACHE_L0_DATA_ECC_UNCORRECTED, 0, 0),
		56	GPU_NONCRITERR("icache_l1_data_ecc_corrected", 0, 0, 0),
		57	GPU_CRITERR("icache_l1_data_ecc_uncorrected",
		58	GPU_SM_ICACHE_L1_DATA_ECC_UNCORRECTED, 0, 0),
		59	GPU_NONCRITERR("icache_l0_predecode_ecc_corrected", 0, 0, 0),
		60	GPU_CRITERR("icache_l0_predecode_ecc_uncorrected",
		61	GPU_SM_ICACHE_L0_PREDECODE_ECC_UNCORRECTED, 0, 0),
		62	GPU_NONCRITERR("l1_tag_miss_fifo_ecc_corrected", 0, 0, 0),
		63	GPU_CRITERR("l1_tag_miss_fifo_ecc_uncorrected",
		64	GPU_SM_L1_TAG_MISS_FIFO_ECC_UNCORRECTED, 0, 0),
		65	GPU_NONCRITERR("l1_tag_s2r_pixprf_ecc_corrected", 0, 0, 0),
		66	GPU_CRITERR("l1_tag_s2r_pixprf_ecc_uncorrected",
		67	GPU_SM_L1_TAG_S2R_PIXPRF_ECC_UNCORRECTED, 0, 0),
		68	GPU_CRITERR("machine_check_error", 0, 0, 0),
		69	GPU_NONCRITERR("icache_l1_predecode_ecc_corrected", 0, 0, 0),
		70	GPU_CRITERR("icache_l1_predecode_ecc_uncorrected",
		71	GPU_SM_ICACHE_L1_PREDECODE_ECC_UNCORRECTED, 0, 0),
		72	},
		73	},
		74	{
		75	.name = "fecs",
		76	.hw_unit = (u32)NVGPU_ERR_MODULE_FECS,
		77	.num_errs = 4U,
		78	.base_ecc_service_id =
		79	NVGUARD_SERVICE_IGPU_FECS_SWERR_FALCON_IMEM_ECC_CORRECTED,
		80	.errs = (struct nvgpu_err_desc[]) {
		81	GPU_NONCRITERR("falcon_imem_ecc_corrected",
		82	GPU_FECS_FALCON_IMEM_ECC_CORRECTED, 0, 0),
		83	GPU_CRITERR("falcon_imem_ecc_uncorrected",
		84	GPU_FECS_FALCON_IMEM_ECC_UNCORRECTED, 0, 0),
		85	GPU_NONCRITERR("falcon_dmem_ecc_corrected", 0, 0, 0),
		86	GPU_CRITERR("falcon_dmem_ecc_uncorrected",
		87	GPU_FECS_FALCON_DMEM_ECC_UNCORRECTED, 0, 0),
		88	},
		89	},
		90	{
		91	.name = "pmu",
		92	.hw_unit = NVGPU_ERR_MODULE_PMU,
		93	.num_errs = 4U,
		94	.base_ecc_service_id =
		95	NVGUARD_SERVICE_IGPU_PMU_SWERR_FALCON_IMEM_ECC_CORRECTED,
		96	.errs = (struct nvgpu_err_desc[]) {
		97	GPU_NONCRITERR("falcon_imem_ecc_corrected",
		98	GPU_PMU_FALCON_IMEM_ECC_CORRECTED, 0, 0),
		99	GPU_CRITERR("falcon_imem_ecc_uncorrected",
		100	GPU_PMU_FALCON_IMEM_ECC_UNCORRECTED, 0, 0),
		101	GPU_NONCRITERR("falcon_dmem_ecc_corrected", 0, 0, 0),
		102	GPU_CRITERR("falcon_dmem_ecc_uncorrected",
		103	GPU_PMU_FALCON_DMEM_ECC_UNCORRECTED, 0, 0),
		104	},
		105	},
		106	};
		107
		108	static void nvgpu_init_err_msg_header(struct gpu_err_header *header)
		109	{
		110	header->version.major = (u16)1U;
		111	header->version.minor = (u16)0U;
		112	header->sub_err_type = 0U;
		113	header->sub_unit_id = 0UL;
		114	header->address = 0UL;
		115	header->timestamp_ns = 0UL;
		116	}
		117
		118	static void nvgpu_init_ecc_err_msg(struct gpu_ecc_error_info *err_info)
		119	{
		120	nvgpu_init_err_msg_header(&err_info->header);
		121	err_info->err_cnt = 0UL;
		122	}
		123
		124	static void nvgpu_report_ecc_error_linux(struct gk20a *g, u32 hw_unit, u32 inst,
		125	u32 err_id, u64 err_addr, u64 err_count)
		126	{
		127	int err = 0;
		128	u32 s_id = 0;
		129	u8 err_status = 0;
		130	u8 err_info_size = 0;
		131	u64 timestamp = 0ULL;
		132	int err_threshold_counter = 0;
		133	struct gpu_ecc_error_info err_pkt;
		134	struct nvgpu_err_desc *err_desc = NULL;
		135	struct nvgpu_err_hw_module *hw_module = NULL;
		136	nv_guard_request_t req;
		137
		138	memset(&req, 0, sizeof(req));
		139	nvgpu_init_ecc_err_msg(&err_pkt);
		140	if (hw_unit >= sizeof(gv11b_err_lut)/sizeof(gv11b_err_lut[0])) {
		141	err = -EINVAL;
		142	goto done;
		143	}
		144
		145	hw_module = &gv11b_err_lut[hw_unit];
		146	if (err_id >= hw_module->num_errs) {
		147	nvgpu_err(g, "invalid err_id (%u) for hw module (%u)",
		148	err_id, hw_module->hw_unit);
		149	err = -EINVAL;
		150	goto done;
		151	}
		152	err_desc = &hw_module->errs[err_id];
		153	timestamp = (u64)nvgpu_current_time_ns();
		154
		155	err_pkt.header.timestamp_ns = timestamp;
		156	err_pkt.header.sub_unit_id = inst;
		157	err_pkt.header.address = err_addr;
		158	err_pkt.err_cnt = err_count;
		159	err_info_size = sizeof(err_pkt);
		160
		161	s_id = hw_module->base_ecc_service_id + err_id;
		162
		163	if (err_desc->is_critical) {
		164	err_status = NVGUARD_ERROR_DETECTED;
		165	} else {
		166	err_status = NVGUARD_NO_ERROR;
		167	}
		168
		169	nvgpu_atomic_inc(&err_desc->err_count);
		170	err_threshold_counter = nvgpu_atomic_cmpxchg(&err_desc->err_count,
		171	err_desc->err_threshold + 1, 0);
		172
		173	if (unlikely(err_threshold_counter != err_desc->err_threshold + 1)) {
		174	goto done;
		175	}
		176
		177	nvgpu_log(g, gpu_dbg_ecc, "ECC reporting hw: %s, desc:%s, count:%llu",
		178	hw_module->name, err_desc->name, err_count);
		179
		180	req.srv_id_cmd = NVGUARD_SERVICESTATUS_NOTIFICATION;
		181	req.srv_status.srv_id = (nv_guard_service_id_t)s_id;
		182	req.srv_status.status = err_status;
		183	req.srv_status.timestamp = timestamp;
		184	req.srv_status.error_info_size = err_info_size;
		185	memcpy(req.srv_status.error_info, (u8*)&err_pkt, err_info_size);
		186
		187	/*
		188	* l1ss_submit_rq may fail due to kmalloc failures but may pass in
		189	* subsequent calls
		190	*/
		191	err = l1ss_submit_rq(&req, true);
		192	if (err != 0) {
		193	nvgpu_err(g, "Error returned from L1SS submit %d", err);
		194	}
		195
		196	if (err_desc->is_critical) {
		197	nvgpu_quiesce(g);
		198	}
		199
		200	done:
		201	return;
		202	}
		203
		204	static void nvgpu_report_ecc_error_empty(struct gk20a *g, u32 hw_unit, u32 inst,
		205	u32 err_id, u64 err_addr, u64 err_count) {
		206	nvgpu_log(g, gpu_dbg_ecc, "ECC reporting empty");
		207	}
		208
		209	const struct nvgpu_ecc_reporting_ops default_disabled_ecc_report_ops = {
		210	.report_ecc_err = nvgpu_report_ecc_error_empty,
		211	};
		212
		213	const struct nvgpu_ecc_reporting_ops ecc_enable_report_ops = {
		214	.report_ecc_err = nvgpu_report_ecc_error_linux,
		215	};
		216
		217	static int nvgpu_l1ss_callback(l1ss_cli_callback_param param, void *data)
		218	{
		219	struct gk20a g = (struct gk20a )data;
		220	struct nvgpu_os_linux *l = NULL;
		221	struct nvgpu_ecc_reporting_linux *ecc_reporting_linux = NULL;
		222	int err = 0;
		223	/* Ensure we have a valid gk20a struct before proceeding */
		224	if ((g == NULL) \|\| (gk20a_get(g) == NULL)) {
		225	return -ENODEV;
		226	}
		227
		228	l = nvgpu_os_linux_from_gk20a(g);
		229	ecc_reporting_linux = &l->ecc_reporting_linux;
		230
		231	nvgpu_spinlock_acquire(&ecc_reporting_linux->common.lock);
		232	if (param == L1SS_READY) {
		233	if (!ecc_reporting_linux->common.ecc_reporting_service_enabled) {
		234	ecc_reporting_linux->common.ecc_reporting_service_enabled = true;
		235	ecc_reporting_linux->common.ops = &ecc_enable_report_ops;
		236	nvgpu_log(g, gpu_dbg_ecc, "ECC reporting is enabled");
		237	}
		238	} else if (param == L1SS_NOT_READY) {
		239	if (ecc_reporting_linux->common.ecc_reporting_service_enabled) {
		240	ecc_reporting_linux->common.ecc_reporting_service_enabled = false;
		241	ecc_reporting_linux->common.ops = &default_disabled_ecc_report_ops;
		242	nvgpu_log(g, gpu_dbg_ecc, "ECC reporting is disabled");
		243	}
		244	} else {
		245	err = -EINVAL;
		246	}
		247	nvgpu_spinlock_release(&ecc_reporting_linux->common.lock);
		248
		249	gk20a_put(g);
		250
		251	return err;
		252	}
		253
		254	void nvgpu_init_ecc_reporting(struct gk20a *g)
		255	{
		256	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
		257	struct nvgpu_ecc_reporting_linux *ecc_report_linux = &l->ecc_reporting_linux;
		258	int err = 0;
		259	/* This will invoke the registration API */
		260	nvgpu_spinlock_init(&ecc_report_linux->common.lock);
		261	ecc_report_linux->priv.id = (NVGUARD_GROUPID_IGPU & NVGUARD_GROUPINDEX_FIELDMASK);
		262	ecc_report_linux->priv.cli_callback = nvgpu_l1ss_callback;
		263	ecc_report_linux->priv.data = g;
		264	ecc_report_linux->common.ops = &default_disabled_ecc_report_ops;
		265
		266	nvgpu_log(g, gpu_dbg_ecc, "ECC reporting Init");
		267
		268	/*
		269	* err == 0 indicates service is available but not active yet.
		270	* err == 1 indicates service is available and active
		271	* error for other cases.
		272	*/
		273	err = l1ss_register_client(&ecc_report_linux->priv);
		274	if (err == 0) {
		275	ecc_report_linux->common.ecc_reporting_service_enabled = false;
		276	nvgpu_log(g, gpu_dbg_ecc, "ECC reporting init success");
		277	} else if (err == 1) {
		278	ecc_report_linux->common.ecc_reporting_service_enabled = true;
		279	/* Actual Ops will be replaced during nvgpu_enable_ecc_reporting
		280	* called as part of gk20a_busy()
		281	*/
		282	} else {
		283	nvgpu_log(g, gpu_dbg_ecc, "ECC reporting init failure %d", err);
		284	}
		285	}
		286
		287	void nvgpu_deinit_ecc_reporting(struct gk20a *g)
		288	{
		289	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
		290	struct nvgpu_ecc_reporting_linux *ecc_report_linux = &l->ecc_reporting_linux;
		291
		292	if (ecc_report_linux->common.ecc_reporting_service_enabled) {
		293	ecc_report_linux->common.ecc_reporting_service_enabled = false;
		294	l1ss_deregister_client(ecc_report_linux->priv.id);
		295	memset(ecc_report_linux, 0, sizeof(*ecc_report_linux));
		296	nvgpu_log(g, gpu_dbg_ecc, "ECC reporting de-init success");
		297	}
		298
		299	}
		300
		301	void nvgpu_enable_ecc_reporting(struct gk20a *g)
		302	{
		303	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
		304	struct nvgpu_ecc_reporting_linux *ecc_report_linux = &l->ecc_reporting_linux;
		305	struct nvgpu_ecc_reporting *error_reporting = &ecc_report_linux->common;
		306
		307	nvgpu_spinlock_acquire(&ecc_report_linux->common.lock);
		308	if (error_reporting->ecc_reporting_service_enabled) {
		309	error_reporting->ops = &ecc_enable_report_ops;
		310	nvgpu_log(g, gpu_dbg_ecc, "ECC reporting is enabled");
		311	}
		312	nvgpu_spinlock_release(&ecc_report_linux->common.lock);
		313	}
		314
		315	void nvgpu_disable_ecc_reporting(struct gk20a *g)
		316	{
		317	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
		318	struct nvgpu_ecc_reporting_linux *ecc_report_linux = &l->ecc_reporting_linux;
		319	struct nvgpu_ecc_reporting *error_reporting = &ecc_report_linux->common;
		320
		321	nvgpu_spinlock_acquire(&ecc_report_linux->common.lock);
		322	error_reporting->ops = &default_disabled_ecc_report_ops;
		323	nvgpu_log(g, gpu_dbg_ecc, "ECC reporting is disabled");
		324	nvgpu_spinlock_release(&ecc_report_linux->common.lock);
		325	}
		326
		327	void nvgpu_report_ecc_err(struct gk20a *g, u32 hw_unit, u32 inst,
		328	u32 err_id, u64 err_addr, u64 err_count)
		329	{
		330	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
		331	struct nvgpu_ecc_reporting_linux *ecc_report_linux = &l->ecc_reporting_linux;
		332	struct nvgpu_ecc_reporting *error_reporting = &ecc_report_linux->common;
		333	void (report_ecc_err_func)(struct gk20a g, u32 hw_unit, u32 inst,
		334	u32 err_id, u64 err_addr, u64 err_count);
		335
		336	nvgpu_spinlock_acquire(&ecc_report_linux->common.lock);
		337	report_ecc_err_func = error_reporting->ops->report_ecc_err;
		338	nvgpu_spinlock_release(&ecc_report_linux->common.lock);
		339
		340	report_ecc_err_func(g, hw_unit, inst, err_id, err_addr, err_count);
		341	}