gpu: nvgpu: rework ecc structure and sysfs

- create common file common/ecc.c which include common functions for add ecc counters and remove counters. - common code will create a list of all counter which make it easier to iterate all counters. - Add chip specific file for adding ecc counters. - add linux specific file os/linux/ecc_sysfs.c to export counters to sysfs. - remove obsolete code - MISRA violation for using snprintf is not solved, tracking with jira NVGPU-859 Jira NVGPUT-115 Change-Id: I1905c43c5c9b2b131199807533dee8e63ddc12f4 Signed-off-by: Richard Zhao <rizhao@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1763536 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Richard Zhao <rizhao@nvidia.com> 2018-06-26 20:37:40 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2018-07-19 19:43:58 -0400
commit: 7f14aafc2c02eb0fab458324d0ba91a7fdea3086 (patch)
tree: cda9f48839fbde3444fde521a9b0069eb06cd81a /drivers/gpu/nvgpu/common/ecc.c
parent: 5ff1b3fe5a30c926e59a55ad25dd4daf430c8579 (diff)
1 files changed, 369 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/ecc.c b/drivers/gpu/nvgpu/common/ecc.c
new file mode 100644
index 00000000..b850f09e
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/ecc.c
@@ -0,0 +1,369 @@
+/*
+ * Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include "gk20a/gk20a.h"
+static void nvgpu_ecc_stat_add(struct gk20a *g, struct nvgpu_ecc_stat *stat)
+{
+        struct nvgpu_ecc *ecc = &g->ecc;
+        nvgpu_init_list_node(&stat->node);
+        nvgpu_list_add_tail(&stat->node, &ecc->stats_list);
+        ecc->stats_count++;
+}
+static void nvgpu_ecc_init(struct gk20a *g)
+{
+        struct nvgpu_ecc *ecc = &g->ecc;
+        nvgpu_init_list_node(&ecc->stats_list);
+}
+int nvgpu_ecc_counter_init_per_tpc(struct gk20a *g,
+                struct nvgpu_ecc_stat ***stat, const char *name)
+{
+        struct gr_gk20a *gr = &g->gr;
+        struct nvgpu_ecc_stat **stats;
+        u32 gpc, tpc;
+        int err = 0;
+        stats = nvgpu_kzalloc(g, sizeof(*stats) * gr->gpc_count);
+        if (stats == NULL) {
+                return -ENOMEM;
+        }
+        for (gpc = 0; gpc < gr->gpc_count; gpc++) {
+                stats[gpc] = nvgpu_kzalloc(g,
+                                sizeof(*stats[gpc]) * gr->gpc_tpc_count[gpc]);
+                if (stats[gpc] == NULL) {
+                        err = -ENOMEM;
+                        break;
+                }
+        }
+        if (err != 0) {
+                while (gpc-- != 0u) {
+                        nvgpu_kfree(g, stats[gpc]);
+                }
+                nvgpu_kfree(g, stats);
+                return err;
+        }
+        for (gpc = 0; gpc < gr->gpc_count; gpc++) {
+                for (tpc = 0; tpc < gr->gpc_tpc_count[gpc]; tpc++) {
+                        snprintf(stats[gpc][tpc].name,
+                                        NVGPU_ECC_STAT_NAME_MAX_SIZE,
+                                        "gpc%d_tpc%d_%s", gpc, tpc, name);
+                        nvgpu_ecc_stat_add(g, &stats[gpc][tpc]);
+                }
+        }
+        *stat = stats;
+        return 0;
+}
+int nvgpu_ecc_counter_init_per_gpc(struct gk20a *g,
+                struct nvgpu_ecc_stat **stat, const char *name)
+{
+        struct gr_gk20a *gr = &g->gr;
+        struct nvgpu_ecc_stat *stats;
+        u32 gpc;
+        stats = nvgpu_kzalloc(g, sizeof(*stats) * gr->gpc_count);
+        if (stats == NULL) {
+                return -ENOMEM;
+        }
+        for (gpc = 0; gpc < gr->gpc_count; gpc++) {
+                snprintf(stats[gpc].name, NVGPU_ECC_STAT_NAME_MAX_SIZE,
+                                "gpc%d_%s", gpc, name);
+                nvgpu_ecc_stat_add(g, &stats[gpc]);
+        }
+        *stat = stats;
+        return 0;
+}
+int nvgpu_ecc_counter_init(struct gk20a *g,
+                struct nvgpu_ecc_stat **stat, const char *name)
+{
+        struct nvgpu_ecc_stat *stats;
+        stats = nvgpu_kzalloc(g, sizeof(*stats));
+        if (stats == NULL) {
+                return -ENOMEM;
+        }
+        (void)strncpy(stats->name, name, NVGPU_ECC_STAT_NAME_MAX_SIZE - 1);
+        nvgpu_ecc_stat_add(g, stats);
+        *stat = stats;
+        return 0;
+}
+int nvgpu_ecc_counter_init_per_lts(struct gk20a *g,
+                struct nvgpu_ecc_stat ***stat, const char *name)
+{
+        struct gr_gk20a *gr = &g->gr;
+        struct nvgpu_ecc_stat **stats;
+        u32 ltc, lts;
+        int err = 0;
+        stats = nvgpu_kzalloc(g, sizeof(*stats) * g->ltc_count);
+        if (stats == NULL) {
+                return -ENOMEM;
+        }
+        for (ltc = 0; ltc < g->ltc_count; ltc++) {
+                stats[ltc] = nvgpu_kzalloc(g,
+                                sizeof(*stats[ltc]) * gr->slices_per_ltc);
+                if (stats[ltc] == NULL) {
+                        err = -ENOMEM;
+                        break;
+                }
+        }
+        if (err != 0) {
+                while (ltc-- > 0u) {
+                        nvgpu_kfree(g, stats[ltc]);
+                }
+                nvgpu_kfree(g, stats);
+                return err;
+        }
+        for (ltc = 0; ltc < g->ltc_count; ltc++) {
+                for (lts = 0; lts < gr->slices_per_ltc; lts++) {
+                        snprintf(stats[ltc][lts].name,
+                                        NVGPU_ECC_STAT_NAME_MAX_SIZE,
+                                        "ltc%d_lts%d_%s", ltc, lts, name);
+                        nvgpu_ecc_stat_add(g, &stats[ltc][lts]);
+                }
+        }
+        *stat = stats;
+        return 0;
+}
+int nvgpu_ecc_counter_init_per_fbpa(struct gk20a *g,
+                struct nvgpu_ecc_stat **stat, const char *name)
+{
+        int i;
+        int num_fbpa = nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPAS);
+        struct nvgpu_ecc_stat *stats;
+        stats = nvgpu_kzalloc(g, sizeof(*stats) * num_fbpa);
+        if (stats == NULL) {
+                return -ENOMEM;
+        }
+        for (i = 0; i < num_fbpa; i++) {
+                snprintf(stats[i].name, NVGPU_ECC_STAT_NAME_MAX_SIZE,
+                                "fbpa%d_%s", i, name);
+                nvgpu_ecc_stat_add(g, &stats[i]);
+        }
+        *stat = stats;
+        return 0;
+}
+/* release all ecc_stat */
+void nvgpu_ecc_free(struct gk20a *g)
+{
+        struct nvgpu_ecc *ecc = &g->ecc;
+        struct gr_gk20a *gr = &g->gr;
+        u32 i;
+        for (i = 0; i < gr->gpc_count; i++) {
+                if (ecc->gr.sm_lrf_ecc_single_err_count != NULL) {
+                        nvgpu_kfree(g, ecc->gr.sm_lrf_ecc_single_err_count[i]);
+                }
+                if (ecc->gr.sm_lrf_ecc_double_err_count != NULL) {
+                        nvgpu_kfree(g, ecc->gr.sm_lrf_ecc_double_err_count[i]);
+                }
+                if (ecc->gr.sm_shm_ecc_sec_count != NULL) {
+                        nvgpu_kfree(g, ecc->gr.sm_shm_ecc_sec_count[i]);
+                }
+                if (ecc->gr.sm_shm_ecc_sed_count != NULL) {
+                        nvgpu_kfree(g, ecc->gr.sm_shm_ecc_sed_count[i]);
+                }
+                if (ecc->gr.sm_shm_ecc_ded_count != NULL) {
+                        nvgpu_kfree(g, ecc->gr.sm_shm_ecc_ded_count[i]);
+                }
+                if (ecc->gr.tex_ecc_total_sec_pipe0_count != NULL) {
+                        nvgpu_kfree(g, ecc->gr.tex_ecc_total_sec_pipe0_count[i]);
+                }
+                if (ecc->gr.tex_ecc_total_ded_pipe0_count != NULL) {
+                        nvgpu_kfree(g, ecc->gr.tex_ecc_total_ded_pipe0_count[i]);
+                }
+                if (ecc->gr.tex_unique_ecc_sec_pipe0_count != NULL) {
+                        nvgpu_kfree(g, ecc->gr.tex_unique_ecc_sec_pipe0_count[i]);
+                }
+                if (ecc->gr.tex_unique_ecc_ded_pipe0_count != NULL) {
+                        nvgpu_kfree(g, ecc->gr.tex_unique_ecc_ded_pipe0_count[i]);
+                }
+                if (ecc->gr.tex_ecc_total_sec_pipe1_count != NULL) {
+                        nvgpu_kfree(g, ecc->gr.tex_ecc_total_sec_pipe1_count[i]);
+                }
+                if (ecc->gr.tex_ecc_total_ded_pipe1_count != NULL) {
+                        nvgpu_kfree(g, ecc->gr.tex_ecc_total_ded_pipe1_count[i]);
+                }
+                if (ecc->gr.tex_unique_ecc_sec_pipe1_count != NULL) {
+                        nvgpu_kfree(g, ecc->gr.tex_unique_ecc_sec_pipe1_count[i]);
+                }
+                if (ecc->gr.tex_unique_ecc_ded_pipe1_count != NULL) {
+                        nvgpu_kfree(g, ecc->gr.tex_unique_ecc_ded_pipe1_count[i]);
+                }
+                if (ecc->gr.sm_l1_tag_ecc_corrected_err_count != NULL) {
+                        nvgpu_kfree(g, ecc->gr.sm_l1_tag_ecc_corrected_err_count[i]);
+                }
+                if (ecc->gr.sm_l1_tag_ecc_uncorrected_err_count != NULL) {
+                        nvgpu_kfree(g, ecc->gr.sm_l1_tag_ecc_uncorrected_err_count[i]);
+                }
+                if (ecc->gr.sm_cbu_ecc_corrected_err_count != NULL) {
+                        nvgpu_kfree(g, ecc->gr.sm_cbu_ecc_corrected_err_count[i]);
+                }
+                if (ecc->gr.sm_cbu_ecc_uncorrected_err_count != NULL) {
+                        nvgpu_kfree(g, ecc->gr.sm_cbu_ecc_uncorrected_err_count[i]);
+                }
+                if (ecc->gr.sm_l1_data_ecc_corrected_err_count != NULL) {
+                        nvgpu_kfree(g, ecc->gr.sm_l1_data_ecc_corrected_err_count[i]);
+                }
+                if (ecc->gr.sm_l1_data_ecc_uncorrected_err_count != NULL) {
+                        nvgpu_kfree(g, ecc->gr.sm_l1_data_ecc_uncorrected_err_count[i]);
+                }
+                if (ecc->gr.sm_icache_ecc_corrected_err_count != NULL) {
+                        nvgpu_kfree(g, ecc->gr.sm_icache_ecc_corrected_err_count[i]);
+                }
+                if (ecc->gr.sm_icache_ecc_uncorrected_err_count != NULL) {
+                        nvgpu_kfree(g, ecc->gr.sm_icache_ecc_uncorrected_err_count[i]);
+                }
+        }
+        nvgpu_kfree(g, ecc->gr.sm_lrf_ecc_single_err_count);
+        nvgpu_kfree(g, ecc->gr.sm_lrf_ecc_double_err_count);
+        nvgpu_kfree(g, ecc->gr.sm_shm_ecc_sec_count);
+        nvgpu_kfree(g, ecc->gr.sm_shm_ecc_sed_count);
+        nvgpu_kfree(g, ecc->gr.sm_shm_ecc_ded_count);
+        nvgpu_kfree(g, ecc->gr.tex_ecc_total_sec_pipe0_count);
+        nvgpu_kfree(g, ecc->gr.tex_ecc_total_ded_pipe0_count);
+        nvgpu_kfree(g, ecc->gr.tex_unique_ecc_sec_pipe0_count);
+        nvgpu_kfree(g, ecc->gr.tex_unique_ecc_ded_pipe0_count);
+        nvgpu_kfree(g, ecc->gr.tex_ecc_total_sec_pipe1_count);
+        nvgpu_kfree(g, ecc->gr.tex_ecc_total_ded_pipe1_count);
+        nvgpu_kfree(g, ecc->gr.tex_unique_ecc_sec_pipe1_count);
+        nvgpu_kfree(g, ecc->gr.tex_unique_ecc_ded_pipe1_count);
+        nvgpu_kfree(g, ecc->gr.sm_l1_tag_ecc_corrected_err_count);
+        nvgpu_kfree(g, ecc->gr.sm_l1_tag_ecc_uncorrected_err_count);
+        nvgpu_kfree(g, ecc->gr.sm_cbu_ecc_corrected_err_count);
+        nvgpu_kfree(g, ecc->gr.sm_cbu_ecc_uncorrected_err_count);
+        nvgpu_kfree(g, ecc->gr.sm_l1_data_ecc_corrected_err_count);
+        nvgpu_kfree(g, ecc->gr.sm_l1_data_ecc_uncorrected_err_count);
+        nvgpu_kfree(g, ecc->gr.sm_icache_ecc_corrected_err_count);
+        nvgpu_kfree(g, ecc->gr.sm_icache_ecc_uncorrected_err_count);
+        nvgpu_kfree(g, ecc->gr.gcc_l15_ecc_corrected_err_count);
+        nvgpu_kfree(g, ecc->gr.gcc_l15_ecc_uncorrected_err_count);
+        nvgpu_kfree(g, ecc->gr.gpccs_ecc_corrected_err_count);
+        nvgpu_kfree(g, ecc->gr.gpccs_ecc_uncorrected_err_count);
+        nvgpu_kfree(g, ecc->gr.mmu_l1tlb_ecc_corrected_err_count);
+        nvgpu_kfree(g, ecc->gr.mmu_l1tlb_ecc_uncorrected_err_count);
+        nvgpu_kfree(g, ecc->gr.fecs_ecc_corrected_err_count);
+        nvgpu_kfree(g, ecc->gr.fecs_ecc_uncorrected_err_count);
+        for (i = 0; i < g->ltc_count; i++) {
+                if (ecc->ltc.ecc_sec_count != NULL) {
+                        nvgpu_kfree(g, ecc->ltc.ecc_sec_count[i]);
+                }
+                if (ecc->ltc.ecc_ded_count != NULL) {
+                        nvgpu_kfree(g, ecc->ltc.ecc_ded_count[i]);
+                }
+        }
+        nvgpu_kfree(g, ecc->ltc.ecc_sec_count);
+        nvgpu_kfree(g, ecc->ltc.ecc_ded_count);
+        nvgpu_kfree(g, ecc->fb.mmu_l2tlb_ecc_corrected_err_count);
+        nvgpu_kfree(g, ecc->fb.mmu_l2tlb_ecc_uncorrected_err_count);
+        nvgpu_kfree(g, ecc->fb.mmu_hubtlb_ecc_corrected_err_count);
+        nvgpu_kfree(g, ecc->fb.mmu_hubtlb_ecc_uncorrected_err_count);
+        nvgpu_kfree(g, ecc->fb.mmu_fillunit_ecc_corrected_err_count);
+        nvgpu_kfree(g, ecc->fb.mmu_fillunit_ecc_uncorrected_err_count);
+        nvgpu_kfree(g, ecc->pmu.pmu_ecc_corrected_err_count);
+        nvgpu_kfree(g, ecc->pmu.pmu_ecc_uncorrected_err_count);
+        nvgpu_kfree(g, ecc->fbpa.fbpa_ecc_sec_err_count);
+        nvgpu_kfree(g, ecc->fbpa.fbpa_ecc_ded_err_count);
+        (void)memset(ecc, 0, sizeof(*ecc));
+}
+int nvgpu_ecc_init_support(struct gk20a *g)
+{
+        int err;
+        if (g->ops.gr.init_ecc == NULL) {
+                return 0;
+        }
+        nvgpu_ecc_init(g);
+        err = g->ops.gr.init_ecc(g);
+        if (err != 0) {
+                return err;
+        }
+        err = nvgpu_ecc_sysfs_init(g);
+        if (err != 0) {
+                nvgpu_ecc_free(g);
+                return err;
+        }
+        return 0;
+}
+void nvgpu_ecc_remove_support(struct gk20a *g)
+{
+        if (g->ops.gr.init_ecc == NULL) {
+                return;
+        }
+        nvgpu_ecc_sysfs_remove(g);
+        nvgpu_ecc_free(g);
+}
author	Richard Zhao <rizhao@nvidia.com>	2018-06-26 20:37:40 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2018-07-19 19:43:58 -0400
commit	7f14aafc2c02eb0fab458324d0ba91a7fdea3086 (patch)
tree	cda9f48839fbde3444fde521a9b0069eb06cd81a /drivers/gpu/nvgpu/common/ecc.c
parent	5ff1b3fe5a30c926e59a55ad25dd4daf430c8579 (diff)

diff --git a/drivers/gpu/nvgpu/common/ecc.c b/drivers/gpu/nvgpu/common/ecc.c new file mode 100644 index 00000000..b850f09e --- /dev/null +++ b/drivers/gpu/nvgpu/common/ecc.c
@@ -0,0 +1,369 @@
	1	/*
	2	* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
	3	*
	4	* Permission is hereby granted, free of charge, to any person obtaining a
	5	* copy of this software and associated documentation files (the "Software"),
	6	* to deal in the Software without restriction, including without limitation
	7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
	8	* and/or sell copies of the Software, and to permit persons to whom the
	9	* Software is furnished to do so, subject to the following conditions:
	10	*
	11	* The above copyright notice and this permission notice shall be included in
	12	* all copies or substantial portions of the Software.
	13	*
	14	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	15	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	16	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
	17	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	18	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
	19	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
	20	* DEALINGS IN THE SOFTWARE.
	21	*/
	22
	23	#include "gk20a/gk20a.h"
	24
	25	static void nvgpu_ecc_stat_add(struct gk20a g, struct nvgpu_ecc_stat stat)
	26	{
	27	struct nvgpu_ecc *ecc = &g->ecc;
	28
	29	nvgpu_init_list_node(&stat->node);
	30
	31	nvgpu_list_add_tail(&stat->node, &ecc->stats_list);
	32	ecc->stats_count++;
	33	}
	34
	35	static void nvgpu_ecc_init(struct gk20a *g)
	36	{
	37	struct nvgpu_ecc *ecc = &g->ecc;
	38
	39	nvgpu_init_list_node(&ecc->stats_list);
	40	}
	41
	42	int nvgpu_ecc_counter_init_per_tpc(struct gk20a *g,
	43	struct nvgpu_ecc_stat **stat, const char name)
	44	{
	45	struct gr_gk20a *gr = &g->gr;
	46	struct nvgpu_ecc_stat **stats;
	47	u32 gpc, tpc;
	48	int err = 0;
	49
	50	stats = nvgpu_kzalloc(g, sizeof(stats) gr->gpc_count);
	51	if (stats == NULL) {
	52	return -ENOMEM;
	53	}
	54	for (gpc = 0; gpc < gr->gpc_count; gpc++) {
	55	stats[gpc] = nvgpu_kzalloc(g,
	56	sizeof(stats[gpc]) gr->gpc_tpc_count[gpc]);
	57	if (stats[gpc] == NULL) {
	58	err = -ENOMEM;
	59	break;
	60	}
	61	}
	62
	63	if (err != 0) {
	64	while (gpc-- != 0u) {
	65	nvgpu_kfree(g, stats[gpc]);
	66	}
	67
	68	nvgpu_kfree(g, stats);
	69	return err;
	70	}
	71
	72	for (gpc = 0; gpc < gr->gpc_count; gpc++) {
	73	for (tpc = 0; tpc < gr->gpc_tpc_count[gpc]; tpc++) {
	74	snprintf(stats[gpc][tpc].name,
	75	NVGPU_ECC_STAT_NAME_MAX_SIZE,
	76	"gpc%d_tpc%d_%s", gpc, tpc, name);
	77	nvgpu_ecc_stat_add(g, &stats[gpc][tpc]);
	78	}
	79	}
	80
	81	*stat = stats;
	82	return 0;
	83	}
	84
	85	int nvgpu_ecc_counter_init_per_gpc(struct gk20a *g,
	86	struct nvgpu_ecc_stat *stat, const char name)
	87	{
	88	struct gr_gk20a *gr = &g->gr;
	89	struct nvgpu_ecc_stat *stats;
	90	u32 gpc;
	91
	92	stats = nvgpu_kzalloc(g, sizeof(stats) gr->gpc_count);
	93	if (stats == NULL) {
	94	return -ENOMEM;
	95	}
	96	for (gpc = 0; gpc < gr->gpc_count; gpc++) {
	97	snprintf(stats[gpc].name, NVGPU_ECC_STAT_NAME_MAX_SIZE,
	98	"gpc%d_%s", gpc, name);
	99	nvgpu_ecc_stat_add(g, &stats[gpc]);
	100	}
	101
	102	*stat = stats;
	103	return 0;
	104	}
	105
	106	int nvgpu_ecc_counter_init(struct gk20a *g,
	107	struct nvgpu_ecc_stat *stat, const char name)
	108	{
	109	struct nvgpu_ecc_stat *stats;
	110
	111	stats = nvgpu_kzalloc(g, sizeof(*stats));
	112	if (stats == NULL) {
	113	return -ENOMEM;
	114	}
	115
	116	(void)strncpy(stats->name, name, NVGPU_ECC_STAT_NAME_MAX_SIZE - 1);
	117	nvgpu_ecc_stat_add(g, stats);
	118	*stat = stats;
	119	return 0;
	120	}
	121
	122	int nvgpu_ecc_counter_init_per_lts(struct gk20a *g,
	123	struct nvgpu_ecc_stat **stat, const char name)
	124	{
	125	struct gr_gk20a *gr = &g->gr;
	126	struct nvgpu_ecc_stat **stats;
	127	u32 ltc, lts;
	128	int err = 0;
	129
	130	stats = nvgpu_kzalloc(g, sizeof(stats) g->ltc_count);
	131	if (stats == NULL) {
	132	return -ENOMEM;
	133	}
	134	for (ltc = 0; ltc < g->ltc_count; ltc++) {
	135	stats[ltc] = nvgpu_kzalloc(g,
	136	sizeof(stats[ltc]) gr->slices_per_ltc);
	137	if (stats[ltc] == NULL) {
	138	err = -ENOMEM;
	139	break;
	140	}
	141	}
	142
	143	if (err != 0) {
	144	while (ltc-- > 0u) {
	145	nvgpu_kfree(g, stats[ltc]);
	146	}
	147
	148	nvgpu_kfree(g, stats);
	149	return err;
	150	}
	151
	152	for (ltc = 0; ltc < g->ltc_count; ltc++) {
	153	for (lts = 0; lts < gr->slices_per_ltc; lts++) {
	154	snprintf(stats[ltc][lts].name,
	155	NVGPU_ECC_STAT_NAME_MAX_SIZE,
	156	"ltc%d_lts%d_%s", ltc, lts, name);
	157	nvgpu_ecc_stat_add(g, &stats[ltc][lts]);
	158	}
	159	}
	160
	161	*stat = stats;
	162	return 0;
	163	}
	164
	165	int nvgpu_ecc_counter_init_per_fbpa(struct gk20a *g,
	166	struct nvgpu_ecc_stat *stat, const char name)
	167	{
	168	int i;
	169	int num_fbpa = nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPAS);
	170	struct nvgpu_ecc_stat *stats;
	171
	172	stats = nvgpu_kzalloc(g, sizeof(stats) num_fbpa);
	173	if (stats == NULL) {
	174	return -ENOMEM;
	175	}
	176
	177	for (i = 0; i < num_fbpa; i++) {
	178	snprintf(stats[i].name, NVGPU_ECC_STAT_NAME_MAX_SIZE,
	179	"fbpa%d_%s", i, name);
	180	nvgpu_ecc_stat_add(g, &stats[i]);
	181	}
	182
	183	*stat = stats;
	184	return 0;
	185	}
	186
	187	/* release all ecc_stat */
	188	void nvgpu_ecc_free(struct gk20a *g)
	189	{
	190	struct nvgpu_ecc *ecc = &g->ecc;
	191	struct gr_gk20a *gr = &g->gr;
	192	u32 i;
	193
	194	for (i = 0; i < gr->gpc_count; i++) {
	195	if (ecc->gr.sm_lrf_ecc_single_err_count != NULL) {
	196	nvgpu_kfree(g, ecc->gr.sm_lrf_ecc_single_err_count[i]);
	197	}
	198
	199	if (ecc->gr.sm_lrf_ecc_double_err_count != NULL) {
	200	nvgpu_kfree(g, ecc->gr.sm_lrf_ecc_double_err_count[i]);
	201	}
	202
	203	if (ecc->gr.sm_shm_ecc_sec_count != NULL) {
	204	nvgpu_kfree(g, ecc->gr.sm_shm_ecc_sec_count[i]);
	205	}
	206
	207	if (ecc->gr.sm_shm_ecc_sed_count != NULL) {
	208	nvgpu_kfree(g, ecc->gr.sm_shm_ecc_sed_count[i]);
	209	}
	210
	211	if (ecc->gr.sm_shm_ecc_ded_count != NULL) {
	212	nvgpu_kfree(g, ecc->gr.sm_shm_ecc_ded_count[i]);
	213	}
	214
	215	if (ecc->gr.tex_ecc_total_sec_pipe0_count != NULL) {
	216	nvgpu_kfree(g, ecc->gr.tex_ecc_total_sec_pipe0_count[i]);
	217	}
	218
	219	if (ecc->gr.tex_ecc_total_ded_pipe0_count != NULL) {
	220	nvgpu_kfree(g, ecc->gr.tex_ecc_total_ded_pipe0_count[i]);
	221	}
	222
	223	if (ecc->gr.tex_unique_ecc_sec_pipe0_count != NULL) {
	224	nvgpu_kfree(g, ecc->gr.tex_unique_ecc_sec_pipe0_count[i]);
	225	}
	226
	227	if (ecc->gr.tex_unique_ecc_ded_pipe0_count != NULL) {
	228	nvgpu_kfree(g, ecc->gr.tex_unique_ecc_ded_pipe0_count[i]);
	229	}
	230
	231	if (ecc->gr.tex_ecc_total_sec_pipe1_count != NULL) {
	232	nvgpu_kfree(g, ecc->gr.tex_ecc_total_sec_pipe1_count[i]);
	233	}
	234
	235	if (ecc->gr.tex_ecc_total_ded_pipe1_count != NULL) {
	236	nvgpu_kfree(g, ecc->gr.tex_ecc_total_ded_pipe1_count[i]);
	237	}
	238
	239	if (ecc->gr.tex_unique_ecc_sec_pipe1_count != NULL) {
	240	nvgpu_kfree(g, ecc->gr.tex_unique_ecc_sec_pipe1_count[i]);
	241	}
	242
	243	if (ecc->gr.tex_unique_ecc_ded_pipe1_count != NULL) {
	244	nvgpu_kfree(g, ecc->gr.tex_unique_ecc_ded_pipe1_count[i]);
	245	}
	246
	247	if (ecc->gr.sm_l1_tag_ecc_corrected_err_count != NULL) {
	248	nvgpu_kfree(g, ecc->gr.sm_l1_tag_ecc_corrected_err_count[i]);
	249	}
	250
	251	if (ecc->gr.sm_l1_tag_ecc_uncorrected_err_count != NULL) {
	252	nvgpu_kfree(g, ecc->gr.sm_l1_tag_ecc_uncorrected_err_count[i]);
	253	}
	254
	255	if (ecc->gr.sm_cbu_ecc_corrected_err_count != NULL) {
	256	nvgpu_kfree(g, ecc->gr.sm_cbu_ecc_corrected_err_count[i]);
	257	}
	258
	259	if (ecc->gr.sm_cbu_ecc_uncorrected_err_count != NULL) {
	260	nvgpu_kfree(g, ecc->gr.sm_cbu_ecc_uncorrected_err_count[i]);
	261	}
	262
	263	if (ecc->gr.sm_l1_data_ecc_corrected_err_count != NULL) {
	264	nvgpu_kfree(g, ecc->gr.sm_l1_data_ecc_corrected_err_count[i]);
	265	}
	266
	267	if (ecc->gr.sm_l1_data_ecc_uncorrected_err_count != NULL) {
	268	nvgpu_kfree(g, ecc->gr.sm_l1_data_ecc_uncorrected_err_count[i]);
	269	}
	270
	271	if (ecc->gr.sm_icache_ecc_corrected_err_count != NULL) {
	272	nvgpu_kfree(g, ecc->gr.sm_icache_ecc_corrected_err_count[i]);
	273	}
	274
	275	if (ecc->gr.sm_icache_ecc_uncorrected_err_count != NULL) {
	276	nvgpu_kfree(g, ecc->gr.sm_icache_ecc_uncorrected_err_count[i]);
	277	}
	278	}
	279	nvgpu_kfree(g, ecc->gr.sm_lrf_ecc_single_err_count);
	280	nvgpu_kfree(g, ecc->gr.sm_lrf_ecc_double_err_count);
	281	nvgpu_kfree(g, ecc->gr.sm_shm_ecc_sec_count);
	282	nvgpu_kfree(g, ecc->gr.sm_shm_ecc_sed_count);
	283	nvgpu_kfree(g, ecc->gr.sm_shm_ecc_ded_count);
	284	nvgpu_kfree(g, ecc->gr.tex_ecc_total_sec_pipe0_count);
	285	nvgpu_kfree(g, ecc->gr.tex_ecc_total_ded_pipe0_count);
	286	nvgpu_kfree(g, ecc->gr.tex_unique_ecc_sec_pipe0_count);
	287	nvgpu_kfree(g, ecc->gr.tex_unique_ecc_ded_pipe0_count);
	288	nvgpu_kfree(g, ecc->gr.tex_ecc_total_sec_pipe1_count);
	289	nvgpu_kfree(g, ecc->gr.tex_ecc_total_ded_pipe1_count);
	290	nvgpu_kfree(g, ecc->gr.tex_unique_ecc_sec_pipe1_count);
	291	nvgpu_kfree(g, ecc->gr.tex_unique_ecc_ded_pipe1_count);
	292	nvgpu_kfree(g, ecc->gr.sm_l1_tag_ecc_corrected_err_count);
	293	nvgpu_kfree(g, ecc->gr.sm_l1_tag_ecc_uncorrected_err_count);
	294	nvgpu_kfree(g, ecc->gr.sm_cbu_ecc_corrected_err_count);
	295	nvgpu_kfree(g, ecc->gr.sm_cbu_ecc_uncorrected_err_count);
	296	nvgpu_kfree(g, ecc->gr.sm_l1_data_ecc_corrected_err_count);
	297	nvgpu_kfree(g, ecc->gr.sm_l1_data_ecc_uncorrected_err_count);
	298	nvgpu_kfree(g, ecc->gr.sm_icache_ecc_corrected_err_count);
	299	nvgpu_kfree(g, ecc->gr.sm_icache_ecc_uncorrected_err_count);
	300
	301	nvgpu_kfree(g, ecc->gr.gcc_l15_ecc_corrected_err_count);
	302	nvgpu_kfree(g, ecc->gr.gcc_l15_ecc_uncorrected_err_count);
	303	nvgpu_kfree(g, ecc->gr.gpccs_ecc_corrected_err_count);
	304	nvgpu_kfree(g, ecc->gr.gpccs_ecc_uncorrected_err_count);
	305	nvgpu_kfree(g, ecc->gr.mmu_l1tlb_ecc_corrected_err_count);
	306	nvgpu_kfree(g, ecc->gr.mmu_l1tlb_ecc_uncorrected_err_count);
	307	nvgpu_kfree(g, ecc->gr.fecs_ecc_corrected_err_count);
	308	nvgpu_kfree(g, ecc->gr.fecs_ecc_uncorrected_err_count);
	309
	310	for (i = 0; i < g->ltc_count; i++) {
	311	if (ecc->ltc.ecc_sec_count != NULL) {
	312	nvgpu_kfree(g, ecc->ltc.ecc_sec_count[i]);
	313	}
	314
	315	if (ecc->ltc.ecc_ded_count != NULL) {
	316	nvgpu_kfree(g, ecc->ltc.ecc_ded_count[i]);
	317	}
	318	}
	319	nvgpu_kfree(g, ecc->ltc.ecc_sec_count);
	320	nvgpu_kfree(g, ecc->ltc.ecc_ded_count);
	321
	322	nvgpu_kfree(g, ecc->fb.mmu_l2tlb_ecc_corrected_err_count);
	323	nvgpu_kfree(g, ecc->fb.mmu_l2tlb_ecc_uncorrected_err_count);
	324	nvgpu_kfree(g, ecc->fb.mmu_hubtlb_ecc_corrected_err_count);
	325	nvgpu_kfree(g, ecc->fb.mmu_hubtlb_ecc_uncorrected_err_count);
	326	nvgpu_kfree(g, ecc->fb.mmu_fillunit_ecc_corrected_err_count);
	327	nvgpu_kfree(g, ecc->fb.mmu_fillunit_ecc_uncorrected_err_count);
	328
	329	nvgpu_kfree(g, ecc->pmu.pmu_ecc_corrected_err_count);
	330	nvgpu_kfree(g, ecc->pmu.pmu_ecc_uncorrected_err_count);
	331
	332	nvgpu_kfree(g, ecc->fbpa.fbpa_ecc_sec_err_count);
	333	nvgpu_kfree(g, ecc->fbpa.fbpa_ecc_ded_err_count);
	334
	335	(void)memset(ecc, 0, sizeof(*ecc));
	336	}
	337
	338	int nvgpu_ecc_init_support(struct gk20a *g)
	339	{
	340	int err;
	341
	342	if (g->ops.gr.init_ecc == NULL) {
	343	return 0;
	344	}
	345
	346	nvgpu_ecc_init(g);
	347	err = g->ops.gr.init_ecc(g);
	348	if (err != 0) {
	349	return err;
	350	}
	351
	352	err = nvgpu_ecc_sysfs_init(g);
	353	if (err != 0) {
	354	nvgpu_ecc_free(g);
	355	return err;
	356	}
	357
	358	return 0;
	359	}
	360
	361	void nvgpu_ecc_remove_support(struct gk20a *g)
	362	{
	363	if (g->ops.gr.init_ecc == NULL) {
	364	return;
	365	}
	366
	367	nvgpu_ecc_sysfs_remove(g);
	368	nvgpu_ecc_free(g);
	369	}