From 7f14aafc2c02eb0fab458324d0ba91a7fdea3086 Mon Sep 17 00:00:00 2001 From: Richard Zhao Date: Tue, 26 Jun 2018 17:37:40 -0700 Subject: gpu: nvgpu: rework ecc structure and sysfs - create common file common/ecc.c which include common functions for add ecc counters and remove counters. - common code will create a list of all counter which make it easier to iterate all counters. - Add chip specific file for adding ecc counters. - add linux specific file os/linux/ecc_sysfs.c to export counters to sysfs. - remove obsolete code - MISRA violation for using snprintf is not solved, tracking with jira NVGPU-859 Jira NVGPUT-115 Change-Id: I1905c43c5c9b2b131199807533dee8e63ddc12f4 Signed-off-by: Richard Zhao Reviewed-on: https://git-master.nvidia.com/r/1763536 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/Makefile | 7 +- drivers/gpu/nvgpu/Makefile.sources | 3 + drivers/gpu/nvgpu/common/ecc.c | 369 ++++++++++++++++++++++ drivers/gpu/nvgpu/common/fb/fb_gv11b.c | 24 +- drivers/gpu/nvgpu/common/posix/stubs.c | 11 + drivers/gpu/nvgpu/gk20a/ecc_gk20a.h | 102 ------ drivers/gpu/nvgpu/gk20a/gk20a.h | 8 +- drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 8 +- drivers/gpu/nvgpu/gp106/hal_gp106.c | 4 - drivers/gpu/nvgpu/gp10b/ecc_gp10b.c | 106 +++++++ drivers/gpu/nvgpu/gp10b/ecc_gp10b.h | 28 ++ drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 26 +- drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 6 +- drivers/gpu/nvgpu/gp10b/ltc_gp10b.c | 4 +- drivers/gpu/nvgpu/gv100/hal_gv100.c | 4 - drivers/gpu/nvgpu/gv11b/ecc_gv11b.c | 181 +++++++++++ drivers/gpu/nvgpu/gv11b/ecc_gv11b.h | 28 ++ drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 48 +-- drivers/gpu/nvgpu/gv11b/hal_gv11b.c | 6 +- drivers/gpu/nvgpu/gv11b/ltc_gv11b.c | 10 +- drivers/gpu/nvgpu/gv11b/pmu_gv11b.c | 8 +- drivers/gpu/nvgpu/include/nvgpu/ecc.h | 162 ++++++++++ drivers/gpu/nvgpu/os/linux/ecc_sysfs.c | 80 +++++ drivers/gpu/nvgpu/os/linux/os_linux.h | 1 + drivers/gpu/nvgpu/os/linux/pci.c | 5 - drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.c | 269 ---------------- drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.h | 37 --- drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c | 165 ---------- drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.h | 1 - drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c | 331 ------------------- drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c | 4 - drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c | 4 - 32 files changed, 1044 insertions(+), 1006 deletions(-) create mode 100644 drivers/gpu/nvgpu/common/ecc.c delete mode 100644 drivers/gpu/nvgpu/gk20a/ecc_gk20a.h create mode 100644 drivers/gpu/nvgpu/gp10b/ecc_gp10b.c create mode 100644 drivers/gpu/nvgpu/gp10b/ecc_gp10b.h create mode 100644 drivers/gpu/nvgpu/gv11b/ecc_gv11b.c create mode 100644 drivers/gpu/nvgpu/gv11b/ecc_gv11b.h create mode 100644 drivers/gpu/nvgpu/include/nvgpu/ecc.h create mode 100644 drivers/gpu/nvgpu/os/linux/ecc_sysfs.c delete mode 100644 drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.c delete mode 100644 drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.h diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 72795e08..90858e55 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -69,7 +69,8 @@ nvgpu-y += \ os/linux/sim_pci.o \ os/linux/os_sched.o \ os/linux/nvlink.o \ - os/linux/dt.o + os/linux/dt.o \ + os/linux/ecc_sysfs.o nvgpu-$(CONFIG_GK20A_VIDMEM) += \ os/linux/dmabuf_vidmem.o @@ -100,7 +101,6 @@ nvgpu-$(CONFIG_TEGRA_GK20A) += \ os/linux/module_usermode.o \ os/linux/soc.o \ os/linux/fuse.o \ - os/linux/platform_ecc_sysfs.o \ os/linux/platform_gk20a_tegra.o \ os/linux/platform_gp10b_tegra.o \ os/linux/platform_gv11b_tegra.o @@ -185,6 +185,7 @@ nvgpu-y += \ common/sim.o \ common/sim_pci.o \ common/fifo/submit.o \ + common/ecc.o \ gk20a/gk20a.o \ gk20a/ce2_gk20a.o \ gk20a/fifo_gk20a.o \ @@ -267,6 +268,7 @@ nvgpu-y += \ gp10b/priv_ring_gp10b.o \ gp10b/gp10b.o \ gp10b/fuse_gp10b.o \ + gp10b/ecc_gp10b.o \ gp106/hal_gp106.o \ gp106/mm_gp106.o \ gp106/flcn_gp106.o \ @@ -296,6 +298,7 @@ nvgpu-y += \ gv11b/subctx_gv11b.o \ gv11b/regops_gv11b.o \ gv11b/therm_gv11b.o \ + gv11b/ecc_gv11b.o \ gv100/mm_gv100.o \ gv100/gr_ctx_gv100.o \ gv100/bios_gv100.o \ diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources index 55d7201c..8095f6ba 100644 --- a/drivers/gpu/nvgpu/Makefile.sources +++ b/drivers/gpu/nvgpu/Makefile.sources @@ -49,6 +49,7 @@ srcs := common/mm/nvgpu_allocator.c \ common/rbtree.c \ common/ltc.c \ common/io_common.c \ + common/ecc.c \ common/vbios/bios.c \ common/falcon/falcon.c \ common/pmu/pmu.c \ @@ -166,6 +167,7 @@ srcs := common/mm/nvgpu_allocator.c \ gp10b/priv_ring_gp10b.c \ gp10b/gp10b.c \ gp10b/fuse_gp10b.c \ + gp10b/ecc_gp10b.c \ gv11b/gv11b.c \ gv11b/dbg_gpu_gv11b.c \ gv11b/mc_gv11b.c \ @@ -181,6 +183,7 @@ srcs := common/mm/nvgpu_allocator.c \ gv11b/subctx_gv11b.c \ gv11b/regops_gv11b.c \ gv11b/therm_gv11b.c \ + gv11b/ecc_gv11b.c \ gp106/hal_gp106.c \ gp106/mm_gp106.c \ gp106/flcn_gp106.c \ diff --git a/drivers/gpu/nvgpu/common/ecc.c b/drivers/gpu/nvgpu/common/ecc.c new file mode 100644 index 00000000..b850f09e --- /dev/null +++ b/drivers/gpu/nvgpu/common/ecc.c @@ -0,0 +1,369 @@ +/* + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "gk20a/gk20a.h" + +static void nvgpu_ecc_stat_add(struct gk20a *g, struct nvgpu_ecc_stat *stat) +{ + struct nvgpu_ecc *ecc = &g->ecc; + + nvgpu_init_list_node(&stat->node); + + nvgpu_list_add_tail(&stat->node, &ecc->stats_list); + ecc->stats_count++; +} + +static void nvgpu_ecc_init(struct gk20a *g) +{ + struct nvgpu_ecc *ecc = &g->ecc; + + nvgpu_init_list_node(&ecc->stats_list); +} + +int nvgpu_ecc_counter_init_per_tpc(struct gk20a *g, + struct nvgpu_ecc_stat ***stat, const char *name) +{ + struct gr_gk20a *gr = &g->gr; + struct nvgpu_ecc_stat **stats; + u32 gpc, tpc; + int err = 0; + + stats = nvgpu_kzalloc(g, sizeof(*stats) * gr->gpc_count); + if (stats == NULL) { + return -ENOMEM; + } + for (gpc = 0; gpc < gr->gpc_count; gpc++) { + stats[gpc] = nvgpu_kzalloc(g, + sizeof(*stats[gpc]) * gr->gpc_tpc_count[gpc]); + if (stats[gpc] == NULL) { + err = -ENOMEM; + break; + } + } + + if (err != 0) { + while (gpc-- != 0u) { + nvgpu_kfree(g, stats[gpc]); + } + + nvgpu_kfree(g, stats); + return err; + } + + for (gpc = 0; gpc < gr->gpc_count; gpc++) { + for (tpc = 0; tpc < gr->gpc_tpc_count[gpc]; tpc++) { + snprintf(stats[gpc][tpc].name, + NVGPU_ECC_STAT_NAME_MAX_SIZE, + "gpc%d_tpc%d_%s", gpc, tpc, name); + nvgpu_ecc_stat_add(g, &stats[gpc][tpc]); + } + } + + *stat = stats; + return 0; +} + +int nvgpu_ecc_counter_init_per_gpc(struct gk20a *g, + struct nvgpu_ecc_stat **stat, const char *name) +{ + struct gr_gk20a *gr = &g->gr; + struct nvgpu_ecc_stat *stats; + u32 gpc; + + stats = nvgpu_kzalloc(g, sizeof(*stats) * gr->gpc_count); + if (stats == NULL) { + return -ENOMEM; + } + for (gpc = 0; gpc < gr->gpc_count; gpc++) { + snprintf(stats[gpc].name, NVGPU_ECC_STAT_NAME_MAX_SIZE, + "gpc%d_%s", gpc, name); + nvgpu_ecc_stat_add(g, &stats[gpc]); + } + + *stat = stats; + return 0; +} + +int nvgpu_ecc_counter_init(struct gk20a *g, + struct nvgpu_ecc_stat **stat, const char *name) +{ + struct nvgpu_ecc_stat *stats; + + stats = nvgpu_kzalloc(g, sizeof(*stats)); + if (stats == NULL) { + return -ENOMEM; + } + + (void)strncpy(stats->name, name, NVGPU_ECC_STAT_NAME_MAX_SIZE - 1); + nvgpu_ecc_stat_add(g, stats); + *stat = stats; + return 0; +} + +int nvgpu_ecc_counter_init_per_lts(struct gk20a *g, + struct nvgpu_ecc_stat ***stat, const char *name) +{ + struct gr_gk20a *gr = &g->gr; + struct nvgpu_ecc_stat **stats; + u32 ltc, lts; + int err = 0; + + stats = nvgpu_kzalloc(g, sizeof(*stats) * g->ltc_count); + if (stats == NULL) { + return -ENOMEM; + } + for (ltc = 0; ltc < g->ltc_count; ltc++) { + stats[ltc] = nvgpu_kzalloc(g, + sizeof(*stats[ltc]) * gr->slices_per_ltc); + if (stats[ltc] == NULL) { + err = -ENOMEM; + break; + } + } + + if (err != 0) { + while (ltc-- > 0u) { + nvgpu_kfree(g, stats[ltc]); + } + + nvgpu_kfree(g, stats); + return err; + } + + for (ltc = 0; ltc < g->ltc_count; ltc++) { + for (lts = 0; lts < gr->slices_per_ltc; lts++) { + snprintf(stats[ltc][lts].name, + NVGPU_ECC_STAT_NAME_MAX_SIZE, + "ltc%d_lts%d_%s", ltc, lts, name); + nvgpu_ecc_stat_add(g, &stats[ltc][lts]); + } + } + + *stat = stats; + return 0; +} + +int nvgpu_ecc_counter_init_per_fbpa(struct gk20a *g, + struct nvgpu_ecc_stat **stat, const char *name) +{ + int i; + int num_fbpa = nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPAS); + struct nvgpu_ecc_stat *stats; + + stats = nvgpu_kzalloc(g, sizeof(*stats) * num_fbpa); + if (stats == NULL) { + return -ENOMEM; + } + + for (i = 0; i < num_fbpa; i++) { + snprintf(stats[i].name, NVGPU_ECC_STAT_NAME_MAX_SIZE, + "fbpa%d_%s", i, name); + nvgpu_ecc_stat_add(g, &stats[i]); + } + + *stat = stats; + return 0; +} + +/* release all ecc_stat */ +void nvgpu_ecc_free(struct gk20a *g) +{ + struct nvgpu_ecc *ecc = &g->ecc; + struct gr_gk20a *gr = &g->gr; + u32 i; + + for (i = 0; i < gr->gpc_count; i++) { + if (ecc->gr.sm_lrf_ecc_single_err_count != NULL) { + nvgpu_kfree(g, ecc->gr.sm_lrf_ecc_single_err_count[i]); + } + + if (ecc->gr.sm_lrf_ecc_double_err_count != NULL) { + nvgpu_kfree(g, ecc->gr.sm_lrf_ecc_double_err_count[i]); + } + + if (ecc->gr.sm_shm_ecc_sec_count != NULL) { + nvgpu_kfree(g, ecc->gr.sm_shm_ecc_sec_count[i]); + } + + if (ecc->gr.sm_shm_ecc_sed_count != NULL) { + nvgpu_kfree(g, ecc->gr.sm_shm_ecc_sed_count[i]); + } + + if (ecc->gr.sm_shm_ecc_ded_count != NULL) { + nvgpu_kfree(g, ecc->gr.sm_shm_ecc_ded_count[i]); + } + + if (ecc->gr.tex_ecc_total_sec_pipe0_count != NULL) { + nvgpu_kfree(g, ecc->gr.tex_ecc_total_sec_pipe0_count[i]); + } + + if (ecc->gr.tex_ecc_total_ded_pipe0_count != NULL) { + nvgpu_kfree(g, ecc->gr.tex_ecc_total_ded_pipe0_count[i]); + } + + if (ecc->gr.tex_unique_ecc_sec_pipe0_count != NULL) { + nvgpu_kfree(g, ecc->gr.tex_unique_ecc_sec_pipe0_count[i]); + } + + if (ecc->gr.tex_unique_ecc_ded_pipe0_count != NULL) { + nvgpu_kfree(g, ecc->gr.tex_unique_ecc_ded_pipe0_count[i]); + } + + if (ecc->gr.tex_ecc_total_sec_pipe1_count != NULL) { + nvgpu_kfree(g, ecc->gr.tex_ecc_total_sec_pipe1_count[i]); + } + + if (ecc->gr.tex_ecc_total_ded_pipe1_count != NULL) { + nvgpu_kfree(g, ecc->gr.tex_ecc_total_ded_pipe1_count[i]); + } + + if (ecc->gr.tex_unique_ecc_sec_pipe1_count != NULL) { + nvgpu_kfree(g, ecc->gr.tex_unique_ecc_sec_pipe1_count[i]); + } + + if (ecc->gr.tex_unique_ecc_ded_pipe1_count != NULL) { + nvgpu_kfree(g, ecc->gr.tex_unique_ecc_ded_pipe1_count[i]); + } + + if (ecc->gr.sm_l1_tag_ecc_corrected_err_count != NULL) { + nvgpu_kfree(g, ecc->gr.sm_l1_tag_ecc_corrected_err_count[i]); + } + + if (ecc->gr.sm_l1_tag_ecc_uncorrected_err_count != NULL) { + nvgpu_kfree(g, ecc->gr.sm_l1_tag_ecc_uncorrected_err_count[i]); + } + + if (ecc->gr.sm_cbu_ecc_corrected_err_count != NULL) { + nvgpu_kfree(g, ecc->gr.sm_cbu_ecc_corrected_err_count[i]); + } + + if (ecc->gr.sm_cbu_ecc_uncorrected_err_count != NULL) { + nvgpu_kfree(g, ecc->gr.sm_cbu_ecc_uncorrected_err_count[i]); + } + + if (ecc->gr.sm_l1_data_ecc_corrected_err_count != NULL) { + nvgpu_kfree(g, ecc->gr.sm_l1_data_ecc_corrected_err_count[i]); + } + + if (ecc->gr.sm_l1_data_ecc_uncorrected_err_count != NULL) { + nvgpu_kfree(g, ecc->gr.sm_l1_data_ecc_uncorrected_err_count[i]); + } + + if (ecc->gr.sm_icache_ecc_corrected_err_count != NULL) { + nvgpu_kfree(g, ecc->gr.sm_icache_ecc_corrected_err_count[i]); + } + + if (ecc->gr.sm_icache_ecc_uncorrected_err_count != NULL) { + nvgpu_kfree(g, ecc->gr.sm_icache_ecc_uncorrected_err_count[i]); + } + } + nvgpu_kfree(g, ecc->gr.sm_lrf_ecc_single_err_count); + nvgpu_kfree(g, ecc->gr.sm_lrf_ecc_double_err_count); + nvgpu_kfree(g, ecc->gr.sm_shm_ecc_sec_count); + nvgpu_kfree(g, ecc->gr.sm_shm_ecc_sed_count); + nvgpu_kfree(g, ecc->gr.sm_shm_ecc_ded_count); + nvgpu_kfree(g, ecc->gr.tex_ecc_total_sec_pipe0_count); + nvgpu_kfree(g, ecc->gr.tex_ecc_total_ded_pipe0_count); + nvgpu_kfree(g, ecc->gr.tex_unique_ecc_sec_pipe0_count); + nvgpu_kfree(g, ecc->gr.tex_unique_ecc_ded_pipe0_count); + nvgpu_kfree(g, ecc->gr.tex_ecc_total_sec_pipe1_count); + nvgpu_kfree(g, ecc->gr.tex_ecc_total_ded_pipe1_count); + nvgpu_kfree(g, ecc->gr.tex_unique_ecc_sec_pipe1_count); + nvgpu_kfree(g, ecc->gr.tex_unique_ecc_ded_pipe1_count); + nvgpu_kfree(g, ecc->gr.sm_l1_tag_ecc_corrected_err_count); + nvgpu_kfree(g, ecc->gr.sm_l1_tag_ecc_uncorrected_err_count); + nvgpu_kfree(g, ecc->gr.sm_cbu_ecc_corrected_err_count); + nvgpu_kfree(g, ecc->gr.sm_cbu_ecc_uncorrected_err_count); + nvgpu_kfree(g, ecc->gr.sm_l1_data_ecc_corrected_err_count); + nvgpu_kfree(g, ecc->gr.sm_l1_data_ecc_uncorrected_err_count); + nvgpu_kfree(g, ecc->gr.sm_icache_ecc_corrected_err_count); + nvgpu_kfree(g, ecc->gr.sm_icache_ecc_uncorrected_err_count); + + nvgpu_kfree(g, ecc->gr.gcc_l15_ecc_corrected_err_count); + nvgpu_kfree(g, ecc->gr.gcc_l15_ecc_uncorrected_err_count); + nvgpu_kfree(g, ecc->gr.gpccs_ecc_corrected_err_count); + nvgpu_kfree(g, ecc->gr.gpccs_ecc_uncorrected_err_count); + nvgpu_kfree(g, ecc->gr.mmu_l1tlb_ecc_corrected_err_count); + nvgpu_kfree(g, ecc->gr.mmu_l1tlb_ecc_uncorrected_err_count); + nvgpu_kfree(g, ecc->gr.fecs_ecc_corrected_err_count); + nvgpu_kfree(g, ecc->gr.fecs_ecc_uncorrected_err_count); + + for (i = 0; i < g->ltc_count; i++) { + if (ecc->ltc.ecc_sec_count != NULL) { + nvgpu_kfree(g, ecc->ltc.ecc_sec_count[i]); + } + + if (ecc->ltc.ecc_ded_count != NULL) { + nvgpu_kfree(g, ecc->ltc.ecc_ded_count[i]); + } + } + nvgpu_kfree(g, ecc->ltc.ecc_sec_count); + nvgpu_kfree(g, ecc->ltc.ecc_ded_count); + + nvgpu_kfree(g, ecc->fb.mmu_l2tlb_ecc_corrected_err_count); + nvgpu_kfree(g, ecc->fb.mmu_l2tlb_ecc_uncorrected_err_count); + nvgpu_kfree(g, ecc->fb.mmu_hubtlb_ecc_corrected_err_count); + nvgpu_kfree(g, ecc->fb.mmu_hubtlb_ecc_uncorrected_err_count); + nvgpu_kfree(g, ecc->fb.mmu_fillunit_ecc_corrected_err_count); + nvgpu_kfree(g, ecc->fb.mmu_fillunit_ecc_uncorrected_err_count); + + nvgpu_kfree(g, ecc->pmu.pmu_ecc_corrected_err_count); + nvgpu_kfree(g, ecc->pmu.pmu_ecc_uncorrected_err_count); + + nvgpu_kfree(g, ecc->fbpa.fbpa_ecc_sec_err_count); + nvgpu_kfree(g, ecc->fbpa.fbpa_ecc_ded_err_count); + + (void)memset(ecc, 0, sizeof(*ecc)); +} + +int nvgpu_ecc_init_support(struct gk20a *g) +{ + int err; + + if (g->ops.gr.init_ecc == NULL) { + return 0; + } + + nvgpu_ecc_init(g); + err = g->ops.gr.init_ecc(g); + if (err != 0) { + return err; + } + + err = nvgpu_ecc_sysfs_init(g); + if (err != 0) { + nvgpu_ecc_free(g); + return err; + } + + return 0; +} + +void nvgpu_ecc_remove_support(struct gk20a *g) +{ + if (g->ops.gr.init_ecc == NULL) { + return; + } + + nvgpu_ecc_sysfs_remove(g); + nvgpu_ecc_free(g); +} diff --git a/drivers/gpu/nvgpu/common/fb/fb_gv11b.c b/drivers/gpu/nvgpu/common/fb/fb_gv11b.c index 26dabd72..53f04188 100644 --- a/drivers/gpu/nvgpu/common/fb/fb_gv11b.c +++ b/drivers/gpu/nvgpu/common/fb/fb_gv11b.c @@ -445,9 +445,9 @@ void gv11b_handle_l2tlb_ecc_isr(struct gk20a *g, u32 ecc_status) uncorrected_delta += (0x1UL << fb_mmu_l2tlb_ecc_uncorrected_err_count_total_s()); - g->ecc.fb.mmu_l2tlb_corrected_err_count.counters[0] += + g->ecc.fb.mmu_l2tlb_ecc_corrected_err_count[0].counter += corrected_delta; - g->ecc.fb.mmu_l2tlb_uncorrected_err_count.counters[0] += + g->ecc.fb.mmu_l2tlb_ecc_uncorrected_err_count[0].counter += uncorrected_delta; if (ecc_status & fb_mmu_l2tlb_ecc_status_corrected_err_l2tlb_sa_data_m()) @@ -461,8 +461,8 @@ void gv11b_handle_l2tlb_ecc_isr(struct gk20a *g, u32 ecc_status) "ecc error address: 0x%x", ecc_addr); nvgpu_log(g, gpu_dbg_intr, "ecc error count corrected: %d, uncorrected %d", - g->ecc.fb.mmu_l2tlb_corrected_err_count.counters[0], - g->ecc.fb.mmu_l2tlb_uncorrected_err_count.counters[0]); + g->ecc.fb.mmu_l2tlb_ecc_corrected_err_count[0].counter, + g->ecc.fb.mmu_l2tlb_ecc_uncorrected_err_count[0].counter); } void gv11b_handle_hubtlb_ecc_isr(struct gk20a *g, u32 ecc_status) @@ -503,9 +503,9 @@ void gv11b_handle_hubtlb_ecc_isr(struct gk20a *g, u32 ecc_status) uncorrected_delta += (0x1UL << fb_mmu_hubtlb_ecc_uncorrected_err_count_total_s()); - g->ecc.fb.mmu_hubtlb_corrected_err_count.counters[0] += + g->ecc.fb.mmu_hubtlb_ecc_corrected_err_count[0].counter += corrected_delta; - g->ecc.fb.mmu_hubtlb_uncorrected_err_count.counters[0] += + g->ecc.fb.mmu_hubtlb_ecc_uncorrected_err_count[0].counter += uncorrected_delta; if (ecc_status & fb_mmu_hubtlb_ecc_status_corrected_err_sa_data_m()) @@ -519,8 +519,8 @@ void gv11b_handle_hubtlb_ecc_isr(struct gk20a *g, u32 ecc_status) "ecc error address: 0x%x", ecc_addr); nvgpu_log(g, gpu_dbg_intr, "ecc error count corrected: %d, uncorrected %d", - g->ecc.fb.mmu_hubtlb_corrected_err_count.counters[0], - g->ecc.fb.mmu_hubtlb_uncorrected_err_count.counters[0]); + g->ecc.fb.mmu_hubtlb_ecc_corrected_err_count[0].counter, + g->ecc.fb.mmu_hubtlb_ecc_uncorrected_err_count[0].counter); } void gv11b_handle_fillunit_ecc_isr(struct gk20a *g, u32 ecc_status) @@ -561,9 +561,9 @@ void gv11b_handle_fillunit_ecc_isr(struct gk20a *g, u32 ecc_status) uncorrected_delta += (0x1UL << fb_mmu_fillunit_ecc_uncorrected_err_count_total_s()); - g->ecc.fb.mmu_fillunit_corrected_err_count.counters[0] += + g->ecc.fb.mmu_fillunit_ecc_corrected_err_count[0].counter += corrected_delta; - g->ecc.fb.mmu_fillunit_uncorrected_err_count.counters[0] += + g->ecc.fb.mmu_fillunit_ecc_uncorrected_err_count[0].counter += uncorrected_delta; if (ecc_status & fb_mmu_fillunit_ecc_status_corrected_err_pte_data_m()) @@ -582,8 +582,8 @@ void gv11b_handle_fillunit_ecc_isr(struct gk20a *g, u32 ecc_status) "ecc error address: 0x%x", ecc_addr); nvgpu_log(g, gpu_dbg_intr, "ecc error count corrected: %d, uncorrected %d", - g->ecc.fb.mmu_fillunit_corrected_err_count.counters[0], - g->ecc.fb.mmu_fillunit_uncorrected_err_count.counters[0]); + g->ecc.fb.mmu_fillunit_ecc_corrected_err_count[0].counter, + g->ecc.fb.mmu_fillunit_ecc_uncorrected_err_count[0].counter); } static void gv11b_fb_parse_mmfault(struct mmu_fault_info *mmfault) diff --git a/drivers/gpu/nvgpu/common/posix/stubs.c b/drivers/gpu/nvgpu/common/posix/stubs.c index d6270692..0fa80bff 100644 --- a/drivers/gpu/nvgpu/common/posix/stubs.c +++ b/drivers/gpu/nvgpu/common/posix/stubs.c @@ -25,8 +25,19 @@ * for an implementation. */ +#include + #include "gk20a/dbg_gpu_gk20a.h" void nvgpu_dbg_session_post_event(struct dbg_session_gk20a *dbg_s) { } + +int nvgpu_ecc_sysfs_init(struct gk20a *g) +{ + return 0; +} + +void nvgpu_ecc_sysfs_remove(struct gk20a *g) +{ +} diff --git a/drivers/gpu/nvgpu/gk20a/ecc_gk20a.h b/drivers/gpu/nvgpu/gk20a/ecc_gk20a.h deleted file mode 100644 index 9c50a809..00000000 --- a/drivers/gpu/nvgpu/gk20a/ecc_gk20a.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - * GK20A ECC - * - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ -#ifndef ECC_GK20A_H -#define ECC_GK20A_H - -struct gk20a_ecc_stat { - char **names; - u32 *counters; - u32 count; -#ifdef CONFIG_SYSFS - struct hlist_node hash_node; - struct device_attribute *attr_array; -#endif -}; - -struct ecc_gk20a { - /* Stats per engine */ - struct { - struct gk20a_ecc_stat sm_lrf_single_err_count; - struct gk20a_ecc_stat sm_lrf_double_err_count; - - struct gk20a_ecc_stat sm_shm_sec_count; - struct gk20a_ecc_stat sm_shm_sed_count; - struct gk20a_ecc_stat sm_shm_ded_count; - - struct gk20a_ecc_stat tex_total_sec_pipe0_count; - struct gk20a_ecc_stat tex_total_ded_pipe0_count; - struct gk20a_ecc_stat tex_unique_sec_pipe0_count; - struct gk20a_ecc_stat tex_unique_ded_pipe0_count; - struct gk20a_ecc_stat tex_total_sec_pipe1_count; - struct gk20a_ecc_stat tex_total_ded_pipe1_count; - struct gk20a_ecc_stat tex_unique_sec_pipe1_count; - struct gk20a_ecc_stat tex_unique_ded_pipe1_count; - - struct gk20a_ecc_stat sm_l1_tag_corrected_err_count; - struct gk20a_ecc_stat sm_l1_tag_uncorrected_err_count; - struct gk20a_ecc_stat sm_cbu_corrected_err_count; - struct gk20a_ecc_stat sm_cbu_uncorrected_err_count; - struct gk20a_ecc_stat sm_l1_data_corrected_err_count; - struct gk20a_ecc_stat sm_l1_data_uncorrected_err_count; - struct gk20a_ecc_stat sm_icache_corrected_err_count; - struct gk20a_ecc_stat sm_icache_uncorrected_err_count; - struct gk20a_ecc_stat gcc_l15_corrected_err_count; - struct gk20a_ecc_stat gcc_l15_uncorrected_err_count; - struct gk20a_ecc_stat fecs_corrected_err_count; - struct gk20a_ecc_stat fecs_uncorrected_err_count; - struct gk20a_ecc_stat gpccs_corrected_err_count; - struct gk20a_ecc_stat gpccs_uncorrected_err_count; - struct gk20a_ecc_stat mmu_l1tlb_corrected_err_count; - struct gk20a_ecc_stat mmu_l1tlb_uncorrected_err_count; - } gr; - - struct { - struct gk20a_ecc_stat l2_sec_count; - struct gk20a_ecc_stat l2_ded_count; - struct gk20a_ecc_stat l2_cache_corrected_err_count; - struct gk20a_ecc_stat l2_cache_uncorrected_err_count; - } ltc; - - struct { - struct gk20a_ecc_stat mmu_l2tlb_corrected_err_count; - struct gk20a_ecc_stat mmu_l2tlb_uncorrected_err_count; - struct gk20a_ecc_stat mmu_hubtlb_corrected_err_count; - struct gk20a_ecc_stat mmu_hubtlb_uncorrected_err_count; - struct gk20a_ecc_stat mmu_fillunit_corrected_err_count; - struct gk20a_ecc_stat mmu_fillunit_uncorrected_err_count; - } fb; - - struct { - struct gk20a_ecc_stat pmu_corrected_err_count; - struct gk20a_ecc_stat pmu_uncorrected_err_count; - } pmu; - - struct { - struct gk20a_ecc_stat fbpa_sec_err_count; - struct gk20a_ecc_stat fbpa_ded_err_count; - } fbpa; - -}; - -#endif /*__ECC_GK20A_H__*/ diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 7cb8462f..e69036d7 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -35,7 +35,6 @@ struct gk20a_ctxsw_trace; struct acr_desc; struct nvgpu_mem_alloc_tracker; struct dbg_profiler_object_data; -struct ecc_gk20a; struct gk20a_debug_output; struct nvgpu_clk_pll_debug_data; struct nvgpu_nvhost_dev; @@ -64,6 +63,7 @@ struct nvgpu_ctxsw_trace_filter; #include #include #include +#include #include "clk_gk20a.h" #include "ce2_gk20a.h" @@ -77,7 +77,6 @@ struct nvgpu_ctxsw_trace_filter; #include "perf/perf.h" #include "pmgr/pmgr.h" #include "therm/thrm.h" -#include "ecc_gk20a.h" /* PTIMER_REF_FREQ_HZ corresponds to a period of 32 nanoseconds. 32 ns is the resolution of ptimer. */ @@ -384,8 +383,7 @@ struct gpu_ops { u32 gpc_exception); void (*enable_gpc_exceptions)(struct gk20a *g); void (*enable_exceptions)(struct gk20a *g); - void (*create_gr_sysfs)(struct gk20a *g); - void (*remove_gr_sysfs)(struct gk20a *g); + int (*init_ecc)(struct gk20a *g); u32 (*get_lrf_tex_ltc_dram_override)(struct gk20a *g); int (*record_sm_error_state)(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, struct channel_gk20a *fault_ch); @@ -1385,7 +1383,7 @@ struct gk20a { struct mm_gk20a mm; struct nvgpu_pmu pmu; struct acr_desc acr; - struct ecc_gk20a ecc; + struct nvgpu_ecc ecc; struct clk_pmupstate clk_pmu; struct perf_pmupstate perf_pmu; struct pmgr_pmupstate pmgr_pmu; diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index c70c1cd4..38570041 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -38,6 +38,7 @@ #include #include #include +#include #include "gk20a.h" #include "gr_gk20a.h" @@ -3127,6 +3128,8 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr) gr->ctx_vars.hwpm_ctxsw_buffer_offset_map = NULL; gk20a_comptag_allocator_destroy(g, &gr->comp_tags); + + nvgpu_ecc_remove_support(g); } static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) @@ -4872,8 +4875,9 @@ static int gk20a_init_gr_setup_sw(struct gk20a *g) gr->remove_support = gk20a_remove_gr_support; gr->sw_ready = true; - if (g->ops.gr.create_gr_sysfs) - g->ops.gr.create_gr_sysfs(g); + err = nvgpu_ecc_init_support(g); + if (err) + goto clean_up; nvgpu_log_fn(g, "done"); return 0; diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c index 9490ec10..eb150ce8 100644 --- a/drivers/gpu/nvgpu/gp106/hal_gp106.c +++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c @@ -382,10 +382,6 @@ static const struct gpu_ops gp106_ops = { .update_boosted_ctx = NULL, .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3, .set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4, -#ifdef CONFIG_SYSFS - .create_gr_sysfs = NULL, - .remove_gr_sysfs = NULL, -#endif .set_ctxsw_preemption_mode = gr_gp106_set_ctxsw_preemption_mode, .load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode, .fecs_host_int_enable = gr_gk20a_fecs_host_int_enable, diff --git a/drivers/gpu/nvgpu/gp10b/ecc_gp10b.c b/drivers/gpu/nvgpu/gp10b/ecc_gp10b.c new file mode 100644 index 00000000..cf95c0d7 --- /dev/null +++ b/drivers/gpu/nvgpu/gp10b/ecc_gp10b.c @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include + +#include "gk20a/gk20a.h" +#include "gp10b/ecc_gp10b.h" + +int gp10b_ecc_init(struct gk20a *g) +{ + int err = 0; + + err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_lrf_ecc_single_err_count); + if (err != 0) { + goto done; + } + err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_lrf_ecc_double_err_count); + if (err != 0) { + goto done; + } + + err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_shm_ecc_sec_count); + if (err != 0) { + goto done; + } + err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_shm_ecc_sed_count); + if (err != 0) { + goto done; + } + err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_shm_ecc_ded_count); + if (err != 0) { + goto done; + } + + err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_ecc_total_sec_pipe0_count); + if (err != 0) { + goto done; + } + err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_ecc_total_ded_pipe0_count); + if (err != 0) { + goto done; + } + + err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_unique_ecc_sec_pipe0_count); + if (err != 0) { + goto done; + } + err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_unique_ecc_ded_pipe0_count); + if (err != 0) { + goto done; + } + + err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_ecc_total_sec_pipe1_count); + if (err != 0) { + goto done; + } + err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_ecc_total_ded_pipe1_count); + if (err != 0) { + goto done; + } + + err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_unique_ecc_sec_pipe1_count); + if (err != 0) { + goto done; + } + err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_unique_ecc_ded_pipe1_count); + if (err != 0) { + goto done; + } + + err = NVGPU_ECC_COUNTER_INIT_PER_LTS(ecc_sec_count); + if (err != 0) { + goto done; + } + err = NVGPU_ECC_COUNTER_INIT_PER_LTS(ecc_ded_count); + if (err != 0) { + goto done; + } + +done: + if (err != 0) { + nvgpu_err(g, "ecc counter allocate failed, err=%d", err); + nvgpu_ecc_free(g); + } + + return err; +} diff --git a/drivers/gpu/nvgpu/gp10b/ecc_gp10b.h b/drivers/gpu/nvgpu/gp10b/ecc_gp10b.h new file mode 100644 index 00000000..e5101db0 --- /dev/null +++ b/drivers/gpu/nvgpu/gp10b/ecc_gp10b.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __ECC_GP10B_H__ +#define __ECC_GP10B_H__ + +int gp10b_ecc_init(struct gk20a *g); + +#endif diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index 16eddeca..17c4e8b7 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c @@ -176,7 +176,7 @@ int gr_gp10b_handle_sm_exception(struct gk20a *g, lrf_ecc_ded_status, &lrf_single_count_delta, lrf_double_count_delta); - g->ecc.gr.sm_lrf_single_err_count.counters[tpc] += + g->ecc.gr.sm_lrf_ecc_single_err_count[gpc][tpc].counter += lrf_single_count_delta; } if (lrf_ecc_ded_status) { @@ -188,7 +188,7 @@ int gr_gp10b_handle_sm_exception(struct gk20a *g, lrf_ecc_ded_status, &lrf_double_count_delta, lrf_single_count_delta); - g->ecc.gr.sm_lrf_double_err_count.counters[tpc] += + g->ecc.gr.sm_lrf_ecc_double_err_count[gpc][tpc].counter += lrf_double_count_delta; } gk20a_writel(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset, @@ -213,9 +213,9 @@ int gr_gp10b_handle_sm_exception(struct gk20a *g, ecc_stats_reg_val = gk20a_readl(g, gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset); - g->ecc.gr.sm_shm_sec_count.counters[tpc] += + g->ecc.gr.sm_shm_ecc_sec_count[gpc][tpc].counter += gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_v(ecc_stats_reg_val); - g->ecc.gr.sm_shm_sed_count.counters[tpc] += + g->ecc.gr.sm_shm_ecc_sed_count[gpc][tpc].counter += gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_v(ecc_stats_reg_val); ecc_stats_reg_val &= ~(gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_m() | gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_m()); @@ -235,7 +235,7 @@ int gr_gp10b_handle_sm_exception(struct gk20a *g, ecc_stats_reg_val = gk20a_readl(g, gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset); - g->ecc.gr.sm_shm_ded_count.counters[tpc] += + g->ecc.gr.sm_shm_ecc_ded_count[gpc][tpc].counter += gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_v(ecc_stats_reg_val); ecc_stats_reg_val &= ~(gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_m()); gk20a_writel(g, @@ -276,7 +276,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, ecc_stats_reg_val = gk20a_readl(g, gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset); - g->ecc.gr.tex_total_sec_pipe0_count.counters[tpc] += + g->ecc.gr.tex_ecc_total_sec_pipe0_count[gpc][tpc].counter += gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_v(ecc_stats_reg_val); ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_m(); gk20a_writel(g, @@ -285,7 +285,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, ecc_stats_reg_val = gk20a_readl(g, gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset); - g->ecc.gr.tex_unique_sec_pipe0_count.counters[tpc] += + g->ecc.gr.tex_unique_ecc_sec_pipe0_count[gpc][tpc].counter += gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_v(ecc_stats_reg_val); ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_m(); gk20a_writel(g, @@ -300,7 +300,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, ecc_stats_reg_val = gk20a_readl(g, gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset); - g->ecc.gr.tex_total_sec_pipe1_count.counters[tpc] += + g->ecc.gr.tex_ecc_total_sec_pipe1_count[gpc][tpc].counter += gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_v(ecc_stats_reg_val); ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_m(); gk20a_writel(g, @@ -309,7 +309,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, ecc_stats_reg_val = gk20a_readl(g, gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset); - g->ecc.gr.tex_unique_sec_pipe1_count.counters[tpc] += + g->ecc.gr.tex_unique_ecc_sec_pipe1_count[gpc][tpc].counter += gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_v(ecc_stats_reg_val); ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_m(); gk20a_writel(g, @@ -332,7 +332,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, ecc_stats_reg_val = gk20a_readl(g, gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset); - g->ecc.gr.tex_total_ded_pipe0_count.counters[tpc] += + g->ecc.gr.tex_ecc_total_ded_pipe0_count[gpc][tpc].counter += gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_v(ecc_stats_reg_val); ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_m(); gk20a_writel(g, @@ -341,7 +341,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, ecc_stats_reg_val = gk20a_readl(g, gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset); - g->ecc.gr.tex_unique_ded_pipe0_count.counters[tpc] += + g->ecc.gr.tex_unique_ecc_ded_pipe0_count[gpc][tpc].counter += gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_v(ecc_stats_reg_val); ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_m(); gk20a_writel(g, @@ -356,7 +356,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, ecc_stats_reg_val = gk20a_readl(g, gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset); - g->ecc.gr.tex_total_ded_pipe1_count.counters[tpc] += + g->ecc.gr.tex_ecc_total_ded_pipe1_count[gpc][tpc].counter += gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_v(ecc_stats_reg_val); ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_m(); gk20a_writel(g, @@ -365,7 +365,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, ecc_stats_reg_val = gk20a_readl(g, gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset); - g->ecc.gr.tex_unique_ded_pipe1_count.counters[tpc] += + g->ecc.gr.tex_unique_ecc_ded_pipe1_count[gpc][tpc].counter += gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_v(ecc_stats_reg_val); ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_m(); gk20a_writel(g, diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index 94adf727..d32f644d 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -56,6 +56,7 @@ #include "gp10b/regops_gp10b.h" #include "gp10b/therm_gp10b.h" #include "gp10b/priv_ring_gp10b.h" +#include "gp10b/ecc_gp10b.h" #include "gm20b/ltc_gm20b.h" #include "gm20b/gr_gm20b.h" @@ -339,11 +340,8 @@ static const struct gpu_ops gp10b_ops = { .init_preemption_state = gr_gp10b_init_preemption_state, .update_boosted_ctx = gr_gp10b_update_boosted_ctx, .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3, -#ifdef CONFIG_SYSFS - .create_gr_sysfs = gr_gp10b_create_sysfs, - .remove_gr_sysfs = gr_gp10b_remove_sysfs, -#endif .set_ctxsw_preemption_mode = gr_gp10b_set_ctxsw_preemption_mode, + .init_ecc = gp10b_ecc_init, .init_ctxsw_hdr_data = gr_gp10b_init_ctxsw_hdr_data, .init_gfxp_wfi_timeout_count = gr_gp10b_init_gfxp_wfi_timeout_count, diff --git a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c index 1e5807d5..aeeda4a8 100644 --- a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c @@ -249,7 +249,7 @@ void gp10b_ltc_isr(struct gk20a *g) ecc_stats_reg_val = gk20a_readl(g, ltc_ltc0_lts0_dstg_ecc_report_r() + offset); - g->ecc.ltc.l2_sec_count.counters[ltc*g->ltc_count + slice] += + g->ecc.ltc.ecc_sec_count[ltc][slice].counter += ltc_ltc0_lts0_dstg_ecc_report_sec_count_v(ecc_stats_reg_val); ecc_stats_reg_val &= ~(ltc_ltc0_lts0_dstg_ecc_report_sec_count_m()); @@ -268,7 +268,7 @@ void gp10b_ltc_isr(struct gk20a *g) ecc_stats_reg_val = gk20a_readl(g, ltc_ltc0_lts0_dstg_ecc_report_r() + offset); - g->ecc.ltc.l2_ded_count.counters[ltc*g->ltc_count + slice] += + g->ecc.ltc.ecc_ded_count[ltc][slice].counter += ltc_ltc0_lts0_dstg_ecc_report_ded_count_v(ecc_stats_reg_val); ecc_stats_reg_val &= ~(ltc_ltc0_lts0_dstg_ecc_report_ded_count_m()); diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index fdbbef36..6134dedc 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -415,10 +415,6 @@ static const struct gpu_ops gv100_ops = { .update_boosted_ctx = gr_gp10b_update_boosted_ctx, .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3, .set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4, -#ifdef CONFIG_SYSFS - .create_gr_sysfs = gr_gv11b_create_sysfs, - .remove_gr_sysfs = gr_gv11b_remove_sysfs, -#endif .set_ctxsw_preemption_mode = gr_gp10b_set_ctxsw_preemption_mode, .is_etpc_addr = gv11b_gr_pri_is_etpc_addr, .egpc_etpc_priv_addr_table = gv11b_gr_egpc_etpc_priv_addr_table, diff --git a/drivers/gpu/nvgpu/gv11b/ecc_gv11b.c b/drivers/gpu/nvgpu/gv11b/ecc_gv11b.c new file mode 100644 index 00000000..6e29bf94 --- /dev/null +++ b/drivers/gpu/nvgpu/gv11b/ecc_gv11b.c @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include + +#include "gk20a/gk20a.h" +#include "gv11b/ecc_gv11b.h" + +int gv11b_ecc_init(struct gk20a *g) +{ + int err; + + err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_lrf_ecc_single_err_count); + if (err != 0) { + goto done; + } + err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_lrf_ecc_double_err_count); + if (err != 0) { + goto done; + } + + err = NVGPU_ECC_COUNTER_INIT_PER_TPC( + sm_l1_tag_ecc_corrected_err_count); + if (err != 0) { + goto done; + } + err = NVGPU_ECC_COUNTER_INIT_PER_TPC( + sm_l1_tag_ecc_uncorrected_err_count); + if (err != 0) { + goto done; + } + + err = NVGPU_ECC_COUNTER_INIT_PER_TPC( + sm_cbu_ecc_corrected_err_count); + if (err != 0) { + goto done; + } + err = NVGPU_ECC_COUNTER_INIT_PER_TPC( + sm_cbu_ecc_uncorrected_err_count); + if (err != 0) { + goto done; + } + + err = NVGPU_ECC_COUNTER_INIT_PER_TPC( + sm_l1_data_ecc_corrected_err_count); + if (err != 0) { + goto done; + } + err = NVGPU_ECC_COUNTER_INIT_PER_TPC( + sm_l1_data_ecc_uncorrected_err_count); + if (err != 0) { + goto done; + } + + err = NVGPU_ECC_COUNTER_INIT_PER_TPC( + sm_icache_ecc_corrected_err_count); + if (err != 0) { + goto done; + } + err = NVGPU_ECC_COUNTER_INIT_PER_TPC( + sm_icache_ecc_uncorrected_err_count); + if (err != 0) { + goto done; + } + + err = NVGPU_ECC_COUNTER_INIT_PER_GPC( + gcc_l15_ecc_corrected_err_count); + if (err != 0) { + goto done; + } + err = NVGPU_ECC_COUNTER_INIT_PER_GPC( + gcc_l15_ecc_uncorrected_err_count); + if (err != 0) { + goto done; + } + + err = NVGPU_ECC_COUNTER_INIT_PER_LTS(ecc_sec_count); + if (err != 0) { + goto done; + } + err = NVGPU_ECC_COUNTER_INIT_PER_LTS(ecc_ded_count); + if (err != 0) { + goto done; + } + + err = NVGPU_ECC_COUNTER_INIT_GR(fecs_ecc_uncorrected_err_count); + if (err != 0) { + goto done; + } + err = NVGPU_ECC_COUNTER_INIT_GR(fecs_ecc_corrected_err_count); + if (err != 0) { + goto done; + } + + err = NVGPU_ECC_COUNTER_INIT_PER_GPC( + gpccs_ecc_uncorrected_err_count); + if (err != 0) { + goto done; + } + err = NVGPU_ECC_COUNTER_INIT_PER_GPC( + gpccs_ecc_corrected_err_count); + if (err != 0) { + goto done; + } + + err = NVGPU_ECC_COUNTER_INIT_PER_GPC( + mmu_l1tlb_ecc_uncorrected_err_count); + if (err != 0) { + goto done; + } + err = NVGPU_ECC_COUNTER_INIT_PER_GPC( + mmu_l1tlb_ecc_corrected_err_count); + if (err != 0) { + goto done; + } + + err = NVGPU_ECC_COUNTER_INIT_FB(mmu_l2tlb_ecc_uncorrected_err_count); + if (err != 0) { + goto done; + } + err = NVGPU_ECC_COUNTER_INIT_FB(mmu_l2tlb_ecc_corrected_err_count); + if (err != 0) { + goto done; + } + + err = NVGPU_ECC_COUNTER_INIT_FB(mmu_hubtlb_ecc_uncorrected_err_count); + if (err != 0) { + goto done; + } + err = NVGPU_ECC_COUNTER_INIT_FB(mmu_hubtlb_ecc_corrected_err_count); + if (err != 0) { + goto done; + } + + err = NVGPU_ECC_COUNTER_INIT_FB( + mmu_fillunit_ecc_uncorrected_err_count); + if (err != 0) { + goto done; + } + err = NVGPU_ECC_COUNTER_INIT_FB( + mmu_fillunit_ecc_corrected_err_count); + if (err != 0) { + goto done; + } + + err = NVGPU_ECC_COUNTER_INIT_PMU(pmu_ecc_uncorrected_err_count); + if (err != 0) { + goto done; + } + err = NVGPU_ECC_COUNTER_INIT_PMU(pmu_ecc_corrected_err_count); + if (err != 0) { + goto done; + } + +done: + if (err != 0) { + nvgpu_err(g, "ecc counter allocate failed, err=%d", err); + nvgpu_ecc_free(g); + } + + return err; +} diff --git a/drivers/gpu/nvgpu/gv11b/ecc_gv11b.h b/drivers/gpu/nvgpu/gv11b/ecc_gv11b.h new file mode 100644 index 00000000..ce0f12b9 --- /dev/null +++ b/drivers/gpu/nvgpu/gv11b/ecc_gv11b.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __ECC_GV11B_H__ +#define __ECC_GV11B_H__ + +int gv11b_ecc_init(struct gk20a *g); + +#endif diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index d3fe5f65..c2f47a20 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -198,7 +198,7 @@ static int gr_gv11b_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32 tpc, l1_tag_corrected_err_count_delta += (is_l1_tag_ecc_corrected_total_err_overflow << gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_total_s()); - g->ecc.gr.sm_l1_tag_corrected_err_count.counters[tpc] += + g->ecc.gr.sm_l1_tag_ecc_corrected_err_count[gpc][tpc].counter += l1_tag_corrected_err_count_delta; gk20a_writel(g, gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_r() + offset, @@ -213,7 +213,7 @@ static int gr_gv11b_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32 tpc, l1_tag_uncorrected_err_count_delta += (is_l1_tag_ecc_uncorrected_total_err_overflow << gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_total_s()); - g->ecc.gr.sm_l1_tag_uncorrected_err_count.counters[tpc] += + g->ecc.gr.sm_l1_tag_ecc_uncorrected_err_count[gpc][tpc].counter += l1_tag_uncorrected_err_count_delta; gk20a_writel(g, gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_r() + offset, @@ -290,7 +290,7 @@ static int gr_gv11b_handle_lrf_exception(struct gk20a *g, u32 gpc, u32 tpc, lrf_corrected_err_count_delta += (is_lrf_ecc_corrected_total_err_overflow << gr_pri_gpc0_tpc0_sm_lrf_ecc_corrected_err_count_total_s()); - g->ecc.gr.sm_lrf_single_err_count.counters[tpc] += + g->ecc.gr.sm_lrf_ecc_single_err_count[gpc][tpc].counter += lrf_corrected_err_count_delta; gk20a_writel(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_corrected_err_count_r() + offset, @@ -305,7 +305,7 @@ static int gr_gv11b_handle_lrf_exception(struct gk20a *g, u32 gpc, u32 tpc, lrf_uncorrected_err_count_delta += (is_lrf_ecc_uncorrected_total_err_overflow << gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_total_s()); - g->ecc.gr.sm_lrf_double_err_count.counters[tpc] += + g->ecc.gr.sm_lrf_ecc_double_err_count[gpc][tpc].counter += lrf_uncorrected_err_count_delta; gk20a_writel(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_r() + offset, @@ -449,7 +449,7 @@ static int gr_gv11b_handle_cbu_exception(struct gk20a *g, u32 gpc, u32 tpc, cbu_corrected_err_count_delta += (is_cbu_ecc_corrected_total_err_overflow << gr_pri_gpc0_tpc0_sm_cbu_ecc_corrected_err_count_total_s()); - g->ecc.gr.sm_cbu_corrected_err_count.counters[tpc] += + g->ecc.gr.sm_cbu_ecc_corrected_err_count[gpc][tpc].counter += cbu_corrected_err_count_delta; gk20a_writel(g, gr_pri_gpc0_tpc0_sm_cbu_ecc_corrected_err_count_r() + offset, @@ -464,7 +464,7 @@ static int gr_gv11b_handle_cbu_exception(struct gk20a *g, u32 gpc, u32 tpc, cbu_uncorrected_err_count_delta += (is_cbu_ecc_uncorrected_total_err_overflow << gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_total_s()); - g->ecc.gr.sm_cbu_uncorrected_err_count.counters[tpc] += + g->ecc.gr.sm_cbu_ecc_uncorrected_err_count[gpc][tpc].counter += cbu_uncorrected_err_count_delta; gk20a_writel(g, gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_r() + offset, @@ -529,7 +529,7 @@ static int gr_gv11b_handle_l1_data_exception(struct gk20a *g, u32 gpc, u32 tpc, l1_data_corrected_err_count_delta += (is_l1_data_ecc_corrected_total_err_overflow << gr_pri_gpc0_tpc0_sm_l1_data_ecc_corrected_err_count_total_s()); - g->ecc.gr.sm_l1_data_corrected_err_count.counters[tpc] += + g->ecc.gr.sm_l1_data_ecc_corrected_err_count[gpc][tpc].counter += l1_data_corrected_err_count_delta; gk20a_writel(g, gr_pri_gpc0_tpc0_sm_l1_data_ecc_corrected_err_count_r() + offset, @@ -544,7 +544,7 @@ static int gr_gv11b_handle_l1_data_exception(struct gk20a *g, u32 gpc, u32 tpc, l1_data_uncorrected_err_count_delta += (is_l1_data_ecc_uncorrected_total_err_overflow << gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_total_s()); - g->ecc.gr.sm_l1_data_uncorrected_err_count.counters[tpc] += + g->ecc.gr.sm_l1_data_ecc_uncorrected_err_count[gpc][tpc].counter += l1_data_uncorrected_err_count_delta; gk20a_writel(g, gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_r() + offset, @@ -613,7 +613,7 @@ static int gr_gv11b_handle_icache_exception(struct gk20a *g, u32 gpc, u32 tpc, icache_corrected_err_count_delta += (is_icache_ecc_corrected_total_err_overflow << gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_total_s()); - g->ecc.gr.sm_icache_corrected_err_count.counters[tpc] += + g->ecc.gr.sm_icache_ecc_corrected_err_count[gpc][tpc].counter += icache_corrected_err_count_delta; gk20a_writel(g, gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_r() + offset, @@ -628,7 +628,7 @@ static int gr_gv11b_handle_icache_exception(struct gk20a *g, u32 gpc, u32 tpc, icache_uncorrected_err_count_delta += (is_icache_ecc_uncorrected_total_err_overflow << gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_total_s()); - g->ecc.gr.sm_icache_uncorrected_err_count.counters[tpc] += + g->ecc.gr.sm_icache_ecc_uncorrected_err_count[gpc][tpc].counter += icache_uncorrected_err_count_delta; gk20a_writel(g, gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_r() + offset, @@ -717,7 +717,7 @@ int gr_gv11b_handle_gcc_exception(struct gk20a *g, u32 gpc, u32 tpc, gcc_l15_corrected_err_count_delta += (is_gcc_l15_ecc_corrected_total_err_overflow << gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_total_s()); - g->ecc.gr.gcc_l15_corrected_err_count.counters[gpc] += + g->ecc.gr.gcc_l15_ecc_corrected_err_count[gpc].counter += gcc_l15_corrected_err_count_delta; gk20a_writel(g, gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_r() + offset, @@ -732,7 +732,7 @@ int gr_gv11b_handle_gcc_exception(struct gk20a *g, u32 gpc, u32 tpc, gcc_l15_uncorrected_err_count_delta += (is_gcc_l15_ecc_uncorrected_total_err_overflow << gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_total_s()); - g->ecc.gr.gcc_l15_uncorrected_err_count.counters[gpc] += + g->ecc.gr.gcc_l15_ecc_uncorrected_err_count[gpc].counter += gcc_l15_uncorrected_err_count_delta; gk20a_writel(g, gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_r() + offset, @@ -802,9 +802,9 @@ static int gr_gv11b_handle_gpcmmu_ecc_exception(struct gk20a *g, u32 gpc, uncorrected_delta += (0x1UL << gr_gpc0_mmu_l1tlb_ecc_uncorrected_err_count_total_s()); - g->ecc.gr.mmu_l1tlb_corrected_err_count.counters[gpc] += + g->ecc.gr.mmu_l1tlb_ecc_corrected_err_count[gpc].counter += corrected_delta; - g->ecc.gr.mmu_l1tlb_uncorrected_err_count.counters[gpc] += + g->ecc.gr.mmu_l1tlb_ecc_uncorrected_err_count[gpc].counter += uncorrected_delta; nvgpu_log(g, gpu_dbg_intr, "mmu l1tlb gpc:%d ecc interrupt intr: 0x%x", gpc, hww_esr); @@ -824,8 +824,8 @@ static int gr_gv11b_handle_gpcmmu_ecc_exception(struct gk20a *g, u32 gpc, "ecc error address: 0x%x", ecc_addr); nvgpu_log(g, gpu_dbg_intr, "ecc error count corrected: %d, uncorrected %d", - g->ecc.gr.mmu_l1tlb_corrected_err_count.counters[gpc], - g->ecc.gr.mmu_l1tlb_uncorrected_err_count.counters[gpc]); + g->ecc.gr.mmu_l1tlb_ecc_corrected_err_count[gpc].counter, + g->ecc.gr.mmu_l1tlb_ecc_uncorrected_err_count[gpc].counter); return ret; } @@ -880,9 +880,9 @@ static int gr_gv11b_handle_gpccs_ecc_exception(struct gk20a *g, u32 gpc, gk20a_writel(g, gr_gpc0_gpccs_falcon_ecc_status_r() + offset, gr_gpc0_gpccs_falcon_ecc_status_reset_task_f()); - g->ecc.gr.gpccs_corrected_err_count.counters[gpc] += + g->ecc.gr.gpccs_ecc_corrected_err_count[gpc].counter += corrected_delta; - g->ecc.gr.gpccs_uncorrected_err_count.counters[gpc] += + g->ecc.gr.gpccs_ecc_uncorrected_err_count[gpc].counter += uncorrected_delta; nvgpu_log(g, gpu_dbg_intr, "gppcs gpc:%d ecc interrupt intr: 0x%x", gpc, hww_esr); @@ -907,8 +907,8 @@ static int gr_gv11b_handle_gpccs_ecc_exception(struct gk20a *g, u32 gpc, nvgpu_log(g, gpu_dbg_intr, "ecc error count corrected: %d, uncorrected %d", - g->ecc.gr.gpccs_corrected_err_count.counters[gpc], - g->ecc.gr.gpccs_uncorrected_err_count.counters[gpc]); + g->ecc.gr.gpccs_ecc_corrected_err_count[gpc].counter, + g->ecc.gr.gpccs_ecc_uncorrected_err_count[gpc].counter); return ret; } @@ -2419,9 +2419,9 @@ static void gr_gv11b_handle_fecs_ecc_error(struct gk20a *g, u32 intr) gk20a_writel(g, gr_fecs_falcon_ecc_status_r(), gr_fecs_falcon_ecc_status_reset_task_f()); - g->ecc.gr.fecs_corrected_err_count.counters[0] += + g->ecc.gr.fecs_ecc_corrected_err_count[0].counter += corrected_delta; - g->ecc.gr.fecs_uncorrected_err_count.counters[0] += + g->ecc.gr.fecs_ecc_uncorrected_err_count[0].counter += uncorrected_delta; nvgpu_log(g, gpu_dbg_intr, @@ -2450,8 +2450,8 @@ static void gr_gv11b_handle_fecs_ecc_error(struct gk20a *g, u32 intr) nvgpu_log(g, gpu_dbg_intr, "ecc error count corrected: %d, uncorrected %d", - g->ecc.gr.fecs_corrected_err_count.counters[0], - g->ecc.gr.fecs_uncorrected_err_count.counters[0]); + g->ecc.gr.fecs_ecc_corrected_err_count[0].counter, + g->ecc.gr.fecs_ecc_uncorrected_err_count[0].counter); } } diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index 366d6928..efac772c 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -84,6 +84,7 @@ #include "regops_gv11b.h" #include "subctx_gv11b.h" #include "therm_gv11b.h" +#include "ecc_gv11b.h" #include #include @@ -369,10 +370,7 @@ static const struct gpu_ops gv11b_ops = { .update_boosted_ctx = gr_gp10b_update_boosted_ctx, .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3, .set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4, -#ifdef CONFIG_SYSFS - .create_gr_sysfs = gr_gv11b_create_sysfs, - .remove_gr_sysfs = gr_gv11b_remove_sysfs, -#endif + .init_ecc = gv11b_ecc_init, .set_ctxsw_preemption_mode = gr_gv11b_set_ctxsw_preemption_mode, .is_etpc_addr = gv11b_gr_pri_is_etpc_addr, .egpc_etpc_priv_addr_table = gv11b_gr_egpc_etpc_priv_addr_table, diff --git a/drivers/gpu/nvgpu/gv11b/ltc_gv11b.c b/drivers/gpu/nvgpu/gv11b/ltc_gv11b.c index 48faa4d2..db797bde 100644 --- a/drivers/gpu/nvgpu/gv11b/ltc_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/ltc_gv11b.c @@ -90,13 +90,11 @@ void gv11b_ltc_isr(struct gk20a *g) u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt; u32 corrected_delta, uncorrected_delta; u32 corrected_overflow, uncorrected_overflow; - u32 ltc_corrected, ltc_uncorrected; mc_intr = gk20a_readl(g, mc_intr_ltc_r()); for (ltc = 0; ltc < g->ltc_count; ltc++) { if ((mc_intr & 1U << ltc) == 0) continue; - ltc_corrected = ltc_uncorrected = 0U; for (slice = 0; slice < g->gr.slices_per_ltc; slice++) { u32 offset = ltc_stride * ltc + lts_stride * slice; @@ -150,8 +148,8 @@ void gv11b_ltc_isr(struct gk20a *g) if (uncorrected_overflow) uncorrected_delta += (0x1U << ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_total_s()); - ltc_corrected += corrected_delta; - ltc_uncorrected += uncorrected_delta; + g->ecc.ltc.ecc_sec_count[ltc][slice].counter += corrected_delta; + g->ecc.ltc.ecc_ded_count[ltc][slice].counter += uncorrected_delta; nvgpu_log(g, gpu_dbg_intr, "ltc:%d lts: %d cache ecc interrupt intr: 0x%x", ltc, slice, ltc_intr3); @@ -177,10 +175,6 @@ void gv11b_ltc_isr(struct gk20a *g) } } - g->ecc.ltc.l2_cache_corrected_err_count.counters[ltc] += - ltc_corrected; - g->ecc.ltc.l2_cache_uncorrected_err_count.counters[ltc] += - ltc_uncorrected; } diff --git a/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c b/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c index 3f0e2f22..9a2e9c00 100644 --- a/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c @@ -343,8 +343,8 @@ void gv11b_pmu_handle_ext_irq(struct gk20a *g, u32 intr0) if (uncorrected_overflow) uncorrected_delta += (0x1UL << pwr_pmu_falcon_ecc_uncorrected_err_count_total_s()); - g->ecc.pmu.pmu_corrected_err_count.counters[0] += corrected_delta; - g->ecc.pmu.pmu_uncorrected_err_count.counters[0] += uncorrected_delta; + g->ecc.pmu.pmu_ecc_corrected_err_count[0].counter += corrected_delta; + g->ecc.pmu.pmu_ecc_uncorrected_err_count[0].counter += uncorrected_delta; nvgpu_log(g, gpu_dbg_intr, "pmu ecc interrupt intr1: 0x%x", intr1); @@ -371,8 +371,8 @@ void gv11b_pmu_handle_ext_irq(struct gk20a *g, u32 intr0) nvgpu_log(g, gpu_dbg_intr, "ecc error count corrected: %d, uncorrected %d", - g->ecc.pmu.pmu_corrected_err_count.counters[0], - g->ecc.pmu.pmu_uncorrected_err_count.counters[0]); + g->ecc.pmu.pmu_ecc_corrected_err_count[0].counter, + g->ecc.pmu.pmu_ecc_uncorrected_err_count[0].counter); } } } diff --git a/drivers/gpu/nvgpu/include/nvgpu/ecc.h b/drivers/gpu/nvgpu/include/nvgpu/ecc.h new file mode 100644 index 00000000..9b211ef7 --- /dev/null +++ b/drivers/gpu/nvgpu/include/nvgpu/ecc.h @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_ECC_H +#define NVGPU_ECC_H + +#include +#include + +#define NVGPU_ECC_STAT_NAME_MAX_SIZE 100 + +struct gk20a; + +struct nvgpu_ecc_stat { + char name[NVGPU_ECC_STAT_NAME_MAX_SIZE]; + u32 counter; + struct nvgpu_list_node node; +}; + +static inline struct nvgpu_ecc_stat *nvgpu_ecc_stat_from_node( + struct nvgpu_list_node *node) +{ + return (struct nvgpu_ecc_stat *)( + (uintptr_t)node - offsetof(struct nvgpu_ecc_stat, node) + ); +} + +struct nvgpu_ecc { + struct { + /* stats per tpc */ + + struct nvgpu_ecc_stat **sm_lrf_ecc_single_err_count; + struct nvgpu_ecc_stat **sm_lrf_ecc_double_err_count; + + struct nvgpu_ecc_stat **sm_shm_ecc_sec_count; + struct nvgpu_ecc_stat **sm_shm_ecc_sed_count; + struct nvgpu_ecc_stat **sm_shm_ecc_ded_count; + + struct nvgpu_ecc_stat **tex_ecc_total_sec_pipe0_count; + struct nvgpu_ecc_stat **tex_ecc_total_ded_pipe0_count; + struct nvgpu_ecc_stat **tex_unique_ecc_sec_pipe0_count; + struct nvgpu_ecc_stat **tex_unique_ecc_ded_pipe0_count; + struct nvgpu_ecc_stat **tex_ecc_total_sec_pipe1_count; + struct nvgpu_ecc_stat **tex_ecc_total_ded_pipe1_count; + struct nvgpu_ecc_stat **tex_unique_ecc_sec_pipe1_count; + struct nvgpu_ecc_stat **tex_unique_ecc_ded_pipe1_count; + + struct nvgpu_ecc_stat **sm_l1_tag_ecc_corrected_err_count; + struct nvgpu_ecc_stat **sm_l1_tag_ecc_uncorrected_err_count; + struct nvgpu_ecc_stat **sm_cbu_ecc_corrected_err_count; + struct nvgpu_ecc_stat **sm_cbu_ecc_uncorrected_err_count; + struct nvgpu_ecc_stat **sm_l1_data_ecc_corrected_err_count; + struct nvgpu_ecc_stat **sm_l1_data_ecc_uncorrected_err_count; + struct nvgpu_ecc_stat **sm_icache_ecc_corrected_err_count; + struct nvgpu_ecc_stat **sm_icache_ecc_uncorrected_err_count; + + /* stats per gpc */ + + struct nvgpu_ecc_stat *gcc_l15_ecc_corrected_err_count; + struct nvgpu_ecc_stat *gcc_l15_ecc_uncorrected_err_count; + + struct nvgpu_ecc_stat *gpccs_ecc_corrected_err_count; + struct nvgpu_ecc_stat *gpccs_ecc_uncorrected_err_count; + struct nvgpu_ecc_stat *mmu_l1tlb_ecc_corrected_err_count; + struct nvgpu_ecc_stat *mmu_l1tlb_ecc_uncorrected_err_count; + + /* stats per device */ + struct nvgpu_ecc_stat *fecs_ecc_corrected_err_count; + struct nvgpu_ecc_stat *fecs_ecc_uncorrected_err_count; + } gr; + + struct { + /* stats per lts */ + struct nvgpu_ecc_stat **ecc_sec_count; + struct nvgpu_ecc_stat **ecc_ded_count; + } ltc; + + struct { + /* stats per device */ + struct nvgpu_ecc_stat *mmu_l2tlb_ecc_corrected_err_count; + struct nvgpu_ecc_stat *mmu_l2tlb_ecc_uncorrected_err_count; + struct nvgpu_ecc_stat *mmu_hubtlb_ecc_corrected_err_count; + struct nvgpu_ecc_stat *mmu_hubtlb_ecc_uncorrected_err_count; + struct nvgpu_ecc_stat *mmu_fillunit_ecc_corrected_err_count; + struct nvgpu_ecc_stat *mmu_fillunit_ecc_uncorrected_err_count; + } fb; + + struct { + /* stats per device */ + struct nvgpu_ecc_stat *pmu_ecc_corrected_err_count; + struct nvgpu_ecc_stat *pmu_ecc_uncorrected_err_count; + } pmu; + + struct { + /* stats per fbpa */ + struct nvgpu_ecc_stat *fbpa_ecc_sec_err_count; + struct nvgpu_ecc_stat *fbpa_ecc_ded_err_count; + } fbpa; + + struct nvgpu_list_node stats_list; + int stats_count; +}; + +int nvgpu_ecc_counter_init_per_tpc(struct gk20a *g, + struct nvgpu_ecc_stat ***stat, const char *name); +#define NVGPU_ECC_COUNTER_INIT_PER_TPC(stat) \ + nvgpu_ecc_counter_init_per_tpc(g, &g->ecc.gr.stat, #stat) + +int nvgpu_ecc_counter_init_per_gpc(struct gk20a *g, + struct nvgpu_ecc_stat **stat, const char *name); +#define NVGPU_ECC_COUNTER_INIT_PER_GPC(stat) \ + nvgpu_ecc_counter_init_per_gpc(g, &g->ecc.gr.stat, #stat) + +int nvgpu_ecc_counter_init(struct gk20a *g, + struct nvgpu_ecc_stat **stat, const char *name); +#define NVGPU_ECC_COUNTER_INIT_GR(stat) \ + nvgpu_ecc_counter_init(g, &g->ecc.gr.stat, #stat) +#define NVGPU_ECC_COUNTER_INIT_FB(stat) \ + nvgpu_ecc_counter_init(g, &g->ecc.fb.stat, #stat) +#define NVGPU_ECC_COUNTER_INIT_PMU(stat) \ + nvgpu_ecc_counter_init(g, &g->ecc.pmu.stat, #stat) + +int nvgpu_ecc_counter_init_per_lts(struct gk20a *g, + struct nvgpu_ecc_stat ***stat, const char *name); +#define NVGPU_ECC_COUNTER_INIT_PER_LTS(stat) \ + nvgpu_ecc_counter_init_per_lts(g, &g->ecc.ltc.stat, #stat) + +int nvgpu_ecc_counter_init_per_fbpa(struct gk20a *g, + struct nvgpu_ecc_stat **stat, const char *name); +#define NVGPU_ECC_COUNTER_INIT_PER_FBPA(stat) \ + nvgpu_ecc_counter_init_per_fbpa(g, &g->ecc.fbpa.stat, #stat) + +void nvgpu_ecc_free(struct gk20a *g); + +int nvgpu_ecc_init_support(struct gk20a *g); +void nvgpu_ecc_remove_support(struct gk20a *g); + +/* OSes to implement */ + +int nvgpu_ecc_sysfs_init(struct gk20a *g); +void nvgpu_ecc_sysfs_remove(struct gk20a *g); + +#endif diff --git a/drivers/gpu/nvgpu/os/linux/ecc_sysfs.c b/drivers/gpu/nvgpu/os/linux/ecc_sysfs.c new file mode 100644 index 00000000..0962e247 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/ecc_sysfs.c @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include + +#include "gk20a/gk20a.h" +#include "os_linux.h" + +int nvgpu_ecc_sysfs_init(struct gk20a *g) +{ + struct device *dev = dev_from_gk20a(g); + struct nvgpu_ecc *ecc = &g->ecc; + struct dev_ext_attribute *attr; + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct nvgpu_ecc_stat *stat; + int i = 0, err; + + attr = nvgpu_kzalloc(g, sizeof(*attr) * ecc->stats_count); + if (!attr) + return -ENOMEM; + + nvgpu_list_for_each_entry(stat, + &ecc->stats_list, nvgpu_ecc_stat, node) { + if (i >= ecc->stats_count) { + err = -EINVAL; + nvgpu_err(g, "stats_list longer than stats_count %d", + ecc->stats_count); + break; + } + sysfs_attr_init(&attr[i].attr); + attr[i].attr.attr.name = stat->name; + attr[i].attr.attr.mode = VERIFY_OCTAL_PERMISSIONS(S_IRUGO); + attr[i].var = &stat->counter; + attr[i].attr.show = device_show_int; + err = device_create_file(dev, &attr[i].attr); + if (err) { + nvgpu_err(g, "sysfs node create failed for %s\n", + stat->name); + break; + } + i++; + } + + if (err) { + while (i-- > 0) + device_remove_file(dev, &attr[i].attr); + nvgpu_kfree(g, attr); + return err; + } + + l->ecc_attrs = attr; + + return 0; +} + +void nvgpu_ecc_sysfs_remove(struct gk20a *g) +{ + struct device *dev = dev_from_gk20a(g); + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct nvgpu_ecc *ecc = &g->ecc; + int i; + + for (i = 0; i < ecc->stats_count; i++) + device_remove_file(dev, &l->ecc_attrs[i].attr); + nvgpu_kfree(g, l->ecc_attrs); + l->ecc_attrs = NULL; +} diff --git a/drivers/gpu/nvgpu/os/linux/os_linux.h b/drivers/gpu/nvgpu/os/linux/os_linux.h index 4dcce322..85d697bd 100644 --- a/drivers/gpu/nvgpu/os/linux/os_linux.h +++ b/drivers/gpu/nvgpu/os/linux/os_linux.h @@ -141,6 +141,7 @@ struct nvgpu_os_linux { struct dentry *debugfs_dump_ctxsw_stats; #endif DECLARE_HASHTABLE(ecc_sysfs_stats_htable, 5); + struct dev_ext_attribute *ecc_attrs; struct gk20a_cde_app cde_app; diff --git a/drivers/gpu/nvgpu/os/linux/pci.c b/drivers/gpu/nvgpu/os/linux/pci.c index 3493b105..41fb69a0 100644 --- a/drivers/gpu/nvgpu/os/linux/pci.c +++ b/drivers/gpu/nvgpu/os/linux/pci.c @@ -52,11 +52,6 @@ static int nvgpu_pci_tegra_probe(struct device *dev) static int nvgpu_pci_tegra_remove(struct device *dev) { - struct gk20a *g = get_gk20a(dev); - - if (g->ops.gr.remove_gr_sysfs) - g->ops.gr.remove_gr_sysfs(g); - return 0; } diff --git a/drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.c b/drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.c deleted file mode 100644 index 2a6ace37..00000000 --- a/drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.c +++ /dev/null @@ -1,269 +0,0 @@ -/* - * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include - -#include -#include -#include - -#include "os_linux.h" - -#include "gk20a/gk20a.h" - -#include "platform_gk20a.h" -#include "platform_gk20a_tegra.h" -#include "platform_gp10b.h" -#include "platform_gp10b_tegra.h" -#include "platform_ecc_sysfs.h" - -static u32 gen_ecc_hash_key(char *str) -{ - int i = 0; - u32 hash_key = 0x811c9dc5; - - while (str[i]) { - hash_key *= 0x1000193; - hash_key ^= (u32)(str[i]); - i++; - }; - - return hash_key; -} - -static ssize_t ecc_stat_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - const char *ecc_stat_full_name = attr->attr.name; - const char *ecc_stat_base_name; - unsigned int hw_unit; - unsigned int subunit; - struct gk20a_ecc_stat *ecc_stat; - u32 hash_key; - struct gk20a *g = get_gk20a(dev); - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - - if (sscanf(ecc_stat_full_name, "ltc%u_lts%u", &hw_unit, - &subunit) == 2) { - ecc_stat_base_name = &(ecc_stat_full_name[strlen("ltc0_lts0_")]); - hw_unit = g->gr.slices_per_ltc * hw_unit + subunit; - } else if (sscanf(ecc_stat_full_name, "ltc%u", &hw_unit) == 1) { - ecc_stat_base_name = &(ecc_stat_full_name[strlen("ltc0_")]); - } else if (sscanf(ecc_stat_full_name, "gpc0_tpc%u", &hw_unit) == 1) { - ecc_stat_base_name = &(ecc_stat_full_name[strlen("gpc0_tpc0_")]); - } else if (sscanf(ecc_stat_full_name, "gpc%u", &hw_unit) == 1) { - ecc_stat_base_name = &(ecc_stat_full_name[strlen("gpc0_")]); - } else if (sscanf(ecc_stat_full_name, "eng%u", &hw_unit) == 1) { - ecc_stat_base_name = &(ecc_stat_full_name[strlen("eng0_")]); - } else { - return snprintf(buf, - PAGE_SIZE, - "Error: Invalid ECC stat name!\n"); - } - - hash_key = gen_ecc_hash_key((char *)ecc_stat_base_name); - - hash_for_each_possible(l->ecc_sysfs_stats_htable, - ecc_stat, - hash_node, - hash_key) { - if (hw_unit >= ecc_stat->count) - continue; - if (!strcmp(ecc_stat_full_name, ecc_stat->names[hw_unit])) - return snprintf(buf, PAGE_SIZE, "%u\n", ecc_stat->counters[hw_unit]); - } - - return snprintf(buf, PAGE_SIZE, "Error: No ECC stat found!\n"); -} - -int nvgpu_gr_ecc_stat_create(struct device *dev, - int is_l2, char *ecc_stat_name, - struct gk20a_ecc_stat *ecc_stat) -{ - struct gk20a *g = get_gk20a(dev); - char *ltc_unit_name = "ltc"; - char *gr_unit_name = "gpc0_tpc"; - char *lts_unit_name = "lts"; - int num_hw_units = 0; - int num_subunits = 0; - - if (is_l2 == 1) - num_hw_units = g->ltc_count; - else if (is_l2 == 2) { - num_hw_units = g->ltc_count; - num_subunits = g->gr.slices_per_ltc; - } else - num_hw_units = g->gr.tpc_count; - - - return nvgpu_ecc_stat_create(dev, num_hw_units, num_subunits, - is_l2 ? ltc_unit_name : gr_unit_name, - num_subunits ? lts_unit_name: NULL, - ecc_stat_name, - ecc_stat); -} - -int nvgpu_ecc_stat_create(struct device *dev, - int num_hw_units, int num_subunits, - char *ecc_unit_name, char *ecc_subunit_name, - char *ecc_stat_name, - struct gk20a_ecc_stat *ecc_stat) -{ - int error = 0; - struct gk20a *g = get_gk20a(dev); - struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); - int hw_unit = 0; - int subunit = 0; - int element = 0; - u32 hash_key = 0; - struct device_attribute *dev_attr_array; - - int num_elements = num_subunits ? num_subunits * num_hw_units : - num_hw_units; - - /* Allocate arrays */ - dev_attr_array = nvgpu_kzalloc(g, sizeof(struct device_attribute) * - num_elements); - ecc_stat->counters = nvgpu_kzalloc(g, sizeof(u32) * num_elements); - ecc_stat->names = nvgpu_kzalloc(g, sizeof(char *) * num_elements); - - for (hw_unit = 0; hw_unit < num_elements; hw_unit++) { - ecc_stat->names[hw_unit] = nvgpu_kzalloc(g, sizeof(char) * - ECC_STAT_NAME_MAX_SIZE); - } - ecc_stat->count = num_elements; - if (num_subunits) { - for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) { - for (subunit = 0; subunit < num_subunits; subunit++) { - element = hw_unit*num_subunits + subunit; - - snprintf(ecc_stat->names[element], - ECC_STAT_NAME_MAX_SIZE, - "%s%d_%s%d_%s", - ecc_unit_name, - hw_unit, - ecc_subunit_name, - subunit, - ecc_stat_name); - - sysfs_attr_init(&dev_attr_array[element].attr); - dev_attr_array[element].attr.name = - ecc_stat->names[element]; - dev_attr_array[element].attr.mode = - VERIFY_OCTAL_PERMISSIONS(S_IRUGO); - dev_attr_array[element].show = ecc_stat_show; - dev_attr_array[element].store = NULL; - - /* Create sysfs file */ - error |= device_create_file(dev, - &dev_attr_array[element]); - - } - } - } else { - for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) { - - /* Fill in struct device_attribute members */ - snprintf(ecc_stat->names[hw_unit], - ECC_STAT_NAME_MAX_SIZE, - "%s%d_%s", - ecc_unit_name, - hw_unit, - ecc_stat_name); - - sysfs_attr_init(&dev_attr_array[hw_unit].attr); - dev_attr_array[hw_unit].attr.name = - ecc_stat->names[hw_unit]; - dev_attr_array[hw_unit].attr.mode = - VERIFY_OCTAL_PERMISSIONS(S_IRUGO); - dev_attr_array[hw_unit].show = ecc_stat_show; - dev_attr_array[hw_unit].store = NULL; - - /* Create sysfs file */ - error |= device_create_file(dev, - &dev_attr_array[hw_unit]); - } - } - - /* Add hash table entry */ - hash_key = gen_ecc_hash_key(ecc_stat_name); - hash_add(l->ecc_sysfs_stats_htable, - &ecc_stat->hash_node, - hash_key); - - ecc_stat->attr_array = dev_attr_array; - - return error; -} - -void nvgpu_gr_ecc_stat_remove(struct device *dev, - int is_l2, struct gk20a_ecc_stat *ecc_stat) -{ - struct gk20a *g = get_gk20a(dev); - int num_hw_units = 0; - int num_subunits = 0; - - if (is_l2 == 1) - num_hw_units = g->ltc_count; - else if (is_l2 == 2) { - num_hw_units = g->ltc_count; - num_subunits = g->gr.slices_per_ltc; - } else - num_hw_units = g->gr.tpc_count; - - nvgpu_ecc_stat_remove(dev, num_hw_units, num_subunits, ecc_stat); -} - -void nvgpu_ecc_stat_remove(struct device *dev, - int num_hw_units, int num_subunits, - struct gk20a_ecc_stat *ecc_stat) -{ - struct gk20a *g = get_gk20a(dev); - struct device_attribute *dev_attr_array = ecc_stat->attr_array; - int hw_unit = 0; - int subunit = 0; - int element = 0; - int num_elements = num_subunits ? num_subunits * num_hw_units : - num_hw_units; - - /* Remove sysfs files */ - if (num_subunits) { - for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) { - for (subunit = 0; subunit < num_subunits; subunit++) { - element = hw_unit * num_subunits + subunit; - - device_remove_file(dev, - &dev_attr_array[element]); - } - } - } else { - for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) - device_remove_file(dev, &dev_attr_array[hw_unit]); - } - - /* Remove hash table entry */ - hash_del(&ecc_stat->hash_node); - - /* Free arrays */ - nvgpu_kfree(g, ecc_stat->counters); - - for (hw_unit = 0; hw_unit < num_elements; hw_unit++) - nvgpu_kfree(g, ecc_stat->names[hw_unit]); - - nvgpu_kfree(g, ecc_stat->names); - nvgpu_kfree(g, dev_attr_array); -} diff --git a/drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.h b/drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.h deleted file mode 100644 index d29f7bd3..00000000 --- a/drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef _NVGPU_PLATFORM_SYSFS_H_ -#define _NVGPU_PLATFORM_SYSFS_H_ - -#include "gp10b/gr_gp10b.h" - -#define ECC_STAT_NAME_MAX_SIZE 100 - -int nvgpu_gr_ecc_stat_create(struct device *dev, - int is_l2, char *ecc_stat_name, - struct gk20a_ecc_stat *ecc_stat); -int nvgpu_ecc_stat_create(struct device *dev, - int num_hw_units, int num_subunits, - char *ecc_unit_name, char *ecc_subunit_name, - char *ecc_stat_name, - struct gk20a_ecc_stat *ecc_stat); -void nvgpu_gr_ecc_stat_remove(struct device *dev, - int is_l2, struct gk20a_ecc_stat *ecc_stat); -void nvgpu_ecc_stat_remove(struct device *dev, - int num_hw_units, int num_subunits, - struct gk20a_ecc_stat *ecc_stat); -#endif diff --git a/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c index d5530368..c5464d5b 100644 --- a/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c +++ b/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c @@ -41,7 +41,6 @@ #include "gk20a/gk20a.h" #include "platform_gk20a.h" -#include "platform_ecc_sysfs.h" #include "platform_gk20a_tegra.h" #include "platform_gp10b.h" #include "platform_gp10b_tegra.h" @@ -177,11 +176,6 @@ static int gp10b_tegra_late_probe(struct device *dev) static int gp10b_tegra_remove(struct device *dev) { - struct gk20a *g = get_gk20a(dev); - - if (g->ops.gr.remove_gr_sysfs) - g->ops.gr.remove_gr_sysfs(g); - /* deinitialise tegra specific scaling quirks */ gp10b_tegra_scale_exit(dev); @@ -476,162 +470,3 @@ struct gk20a_platform gp10b_tegra_platform = { .secure_buffer_size = 401408, }; - -void gr_gp10b_create_sysfs(struct gk20a *g) -{ - int error = 0; - struct device *dev = dev_from_gk20a(g); - - /* This stat creation function is called on GR init. GR can get - initialized multiple times but we only need to create the ECC - stats once. Therefore, add the following check to avoid - creating duplicate stat sysfs nodes. */ - if (g->ecc.gr.sm_lrf_single_err_count.counters != NULL) - return; - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "sm_lrf_ecc_single_err_count", - &g->ecc.gr.sm_lrf_single_err_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "sm_lrf_ecc_double_err_count", - &g->ecc.gr.sm_lrf_double_err_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "sm_shm_ecc_sec_count", - &g->ecc.gr.sm_shm_sec_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "sm_shm_ecc_sed_count", - &g->ecc.gr.sm_shm_sed_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "sm_shm_ecc_ded_count", - &g->ecc.gr.sm_shm_ded_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "tex_ecc_total_sec_pipe0_count", - &g->ecc.gr.tex_total_sec_pipe0_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "tex_ecc_total_ded_pipe0_count", - &g->ecc.gr.tex_total_ded_pipe0_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "tex_ecc_unique_sec_pipe0_count", - &g->ecc.gr.tex_unique_sec_pipe0_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "tex_ecc_unique_ded_pipe0_count", - &g->ecc.gr.tex_unique_ded_pipe0_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "tex_ecc_total_sec_pipe1_count", - &g->ecc.gr.tex_total_sec_pipe1_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "tex_ecc_total_ded_pipe1_count", - &g->ecc.gr.tex_total_ded_pipe1_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "tex_ecc_unique_sec_pipe1_count", - &g->ecc.gr.tex_unique_sec_pipe1_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "tex_ecc_unique_ded_pipe1_count", - &g->ecc.gr.tex_unique_ded_pipe1_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 2, - "ecc_sec_count", - &g->ecc.ltc.l2_sec_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 2, - "ecc_ded_count", - &g->ecc.ltc.l2_ded_count); - - if (error) - dev_err(dev, "Failed to create sysfs attributes!\n"); -} - -void gr_gp10b_remove_sysfs(struct gk20a *g) -{ - struct device *dev = dev_from_gk20a(g); - - if (!g->ecc.gr.sm_lrf_single_err_count.counters) - return; - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.sm_lrf_single_err_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.sm_lrf_double_err_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.sm_shm_sec_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.sm_shm_sed_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.sm_shm_ded_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.tex_total_sec_pipe0_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.tex_total_ded_pipe0_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.tex_unique_sec_pipe0_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.tex_unique_ded_pipe0_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.tex_total_sec_pipe1_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.tex_total_ded_pipe1_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.tex_unique_sec_pipe1_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.tex_unique_ded_pipe1_count); - - nvgpu_gr_ecc_stat_remove(dev, - 2, - &g->ecc.ltc.l2_sec_count); - - nvgpu_gr_ecc_stat_remove(dev, - 2, - &g->ecc.ltc.l2_ded_count); -} diff --git a/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.h b/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.h index 6de90275..85b46b9a 100644 --- a/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.h +++ b/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.h @@ -18,6 +18,5 @@ #define _PLATFORM_GP10B_TEGRA_H_ #include "gp10b/gr_gp10b.h" -#include "platform_ecc_sysfs.h" #endif diff --git a/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c b/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c index d62e7932..c9c13197 100644 --- a/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c +++ b/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c @@ -39,7 +39,6 @@ #include "platform_gp10b.h" #include "platform_gp10b_tegra.h" -#include "platform_ecc_sysfs.h" #include "os_linux.h" #include "platform_gk20a_tegra.h" @@ -94,11 +93,6 @@ static int gv11b_tegra_late_probe(struct device *dev) static int gv11b_tegra_remove(struct device *dev) { - struct gk20a *g = get_gk20a(dev); - - if (g->ops.gr.remove_gr_sysfs) - g->ops.gr.remove_gr_sysfs(g); - gv11b_tegra_scale_exit(dev); #ifdef CONFIG_TEGRA_GK20A_NVHOST @@ -261,328 +255,3 @@ struct gk20a_platform gv11b_tegra_platform = { .secure_buffer_size = 667648, }; - -void gr_gv11b_create_sysfs(struct gk20a *g) -{ - struct device *dev = dev_from_gk20a(g); - int error = 0; - - /* This stat creation function is called on GR init. GR can get - initialized multiple times but we only need to create the ECC - stats once. Therefore, add the following check to avoid - creating duplicate stat sysfs nodes. */ - if (g->ecc.gr.sm_l1_tag_corrected_err_count.counters != NULL) - return; - - gr_gp10b_create_sysfs(g); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "sm_l1_tag_ecc_corrected_err_count", - &g->ecc.gr.sm_l1_tag_corrected_err_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "sm_l1_tag_ecc_uncorrected_err_count", - &g->ecc.gr.sm_l1_tag_uncorrected_err_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "sm_cbu_ecc_corrected_err_count", - &g->ecc.gr.sm_cbu_corrected_err_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "sm_cbu_ecc_uncorrected_err_count", - &g->ecc.gr.sm_cbu_uncorrected_err_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "sm_l1_data_ecc_corrected_err_count", - &g->ecc.gr.sm_l1_data_corrected_err_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "sm_l1_data_ecc_uncorrected_err_count", - &g->ecc.gr.sm_l1_data_uncorrected_err_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "sm_icache_ecc_corrected_err_count", - &g->ecc.gr.sm_icache_corrected_err_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "sm_icache_ecc_uncorrected_err_count", - &g->ecc.gr.sm_icache_uncorrected_err_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "gcc_l15_ecc_corrected_err_count", - &g->ecc.gr.gcc_l15_corrected_err_count); - - error |= nvgpu_gr_ecc_stat_create(dev, - 0, - "gcc_l15_ecc_uncorrected_err_count", - &g->ecc.gr.gcc_l15_uncorrected_err_count); - - error |= nvgpu_ecc_stat_create(dev, - g->ltc_count, - 0, - "ltc", - NULL, - "l2_cache_uncorrected_err_count", - &g->ecc.ltc.l2_cache_uncorrected_err_count); - - error |= nvgpu_ecc_stat_create(dev, - g->ltc_count, - 0, - "ltc", - NULL, - "l2_cache_corrected_err_count", - &g->ecc.ltc.l2_cache_corrected_err_count); - - error |= nvgpu_ecc_stat_create(dev, - 1, - 0, - "gpc", - NULL, - "fecs_ecc_uncorrected_err_count", - &g->ecc.gr.fecs_uncorrected_err_count); - - error |= nvgpu_ecc_stat_create(dev, - 1, - 0, - "gpc", - NULL, - "fecs_ecc_corrected_err_count", - &g->ecc.gr.fecs_corrected_err_count); - - error |= nvgpu_ecc_stat_create(dev, - g->gr.gpc_count, - 0, - "gpc", - NULL, - "gpccs_ecc_uncorrected_err_count", - &g->ecc.gr.gpccs_uncorrected_err_count); - - error |= nvgpu_ecc_stat_create(dev, - g->gr.gpc_count, - 0, - "gpc", - NULL, - "gpccs_ecc_corrected_err_count", - &g->ecc.gr.gpccs_corrected_err_count); - - error |= nvgpu_ecc_stat_create(dev, - g->gr.gpc_count, - 0, - "gpc", - NULL, - "mmu_l1tlb_ecc_uncorrected_err_count", - &g->ecc.gr.mmu_l1tlb_uncorrected_err_count); - - error |= nvgpu_ecc_stat_create(dev, - g->gr.gpc_count, - 0, - "gpc", - NULL, - "mmu_l1tlb_ecc_corrected_err_count", - &g->ecc.gr.mmu_l1tlb_corrected_err_count); - - error |= nvgpu_ecc_stat_create(dev, - 1, - 0, - "eng", - NULL, - "mmu_l2tlb_ecc_uncorrected_err_count", - &g->ecc.fb.mmu_l2tlb_uncorrected_err_count); - - error |= nvgpu_ecc_stat_create(dev, - 1, - 0, - "eng", - NULL, - "mmu_l2tlb_ecc_corrected_err_count", - &g->ecc.fb.mmu_l2tlb_corrected_err_count); - - error |= nvgpu_ecc_stat_create(dev, - 1, - 0, - "eng", - NULL, - "mmu_hubtlb_ecc_uncorrected_err_count", - &g->ecc.fb.mmu_hubtlb_uncorrected_err_count); - - error |= nvgpu_ecc_stat_create(dev, - 1, - 0, - "eng", - NULL, - "mmu_hubtlb_ecc_corrected_err_count", - &g->ecc.fb.mmu_hubtlb_corrected_err_count); - - error |= nvgpu_ecc_stat_create(dev, - 1, - 0, - "eng", - NULL, - "mmu_fillunit_ecc_uncorrected_err_count", - &g->ecc.fb.mmu_fillunit_uncorrected_err_count); - - error |= nvgpu_ecc_stat_create(dev, - 1, - 0, - "eng", - NULL, - "mmu_fillunit_ecc_corrected_err_count", - &g->ecc.fb.mmu_fillunit_corrected_err_count); - - error |= nvgpu_ecc_stat_create(dev, - 1, - 0, - "eng", - NULL, - "pmu_ecc_uncorrected_err_count", - &g->ecc.pmu.pmu_uncorrected_err_count); - - error |= nvgpu_ecc_stat_create(dev, - 1, - 0, - "eng", - NULL, - "pmu_ecc_corrected_err_count", - &g->ecc.pmu.pmu_corrected_err_count); - - if (error) - dev_err(dev, "Failed to create gv11b sysfs attributes!\n"); -} - -void gr_gv11b_remove_sysfs(struct gk20a *g) -{ - struct device *dev = dev_from_gk20a(g); - - if (!g->ecc.gr.sm_l1_tag_corrected_err_count.counters) - return; - gr_gp10b_remove_sysfs(g); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.sm_l1_tag_corrected_err_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.sm_l1_tag_uncorrected_err_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.sm_cbu_corrected_err_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.sm_cbu_uncorrected_err_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.sm_l1_data_corrected_err_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.sm_l1_data_uncorrected_err_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.sm_icache_corrected_err_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.sm_icache_uncorrected_err_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.gcc_l15_corrected_err_count); - - nvgpu_gr_ecc_stat_remove(dev, - 0, - &g->ecc.gr.gcc_l15_uncorrected_err_count); - - nvgpu_ecc_stat_remove(dev, - g->ltc_count, - 0, - &g->ecc.ltc.l2_cache_uncorrected_err_count); - - nvgpu_ecc_stat_remove(dev, - g->ltc_count, - 0, - &g->ecc.ltc.l2_cache_corrected_err_count); - - nvgpu_ecc_stat_remove(dev, - 1, - 0, - &g->ecc.gr.fecs_uncorrected_err_count); - - nvgpu_ecc_stat_remove(dev, - 1, - 0, - &g->ecc.gr.fecs_corrected_err_count); - - nvgpu_ecc_stat_remove(dev, - g->gr.gpc_count, - 0, - &g->ecc.gr.gpccs_uncorrected_err_count); - - nvgpu_ecc_stat_remove(dev, - g->gr.gpc_count, - 0, - &g->ecc.gr.gpccs_corrected_err_count); - - nvgpu_ecc_stat_remove(dev, - g->gr.gpc_count, - 0, - &g->ecc.gr.mmu_l1tlb_uncorrected_err_count); - - nvgpu_ecc_stat_remove(dev, - g->gr.gpc_count, - 0, - &g->ecc.gr.mmu_l1tlb_corrected_err_count); - - nvgpu_ecc_stat_remove(dev, - 1, - 0, - &g->ecc.fb.mmu_l2tlb_uncorrected_err_count); - - nvgpu_ecc_stat_remove(dev, - 1, - 0, - &g->ecc.fb.mmu_l2tlb_corrected_err_count); - - nvgpu_ecc_stat_remove(dev, - 1, - 0, - &g->ecc.fb.mmu_hubtlb_uncorrected_err_count); - - nvgpu_ecc_stat_remove(dev, - 1, - 0, - &g->ecc.fb.mmu_hubtlb_corrected_err_count); - - nvgpu_ecc_stat_remove(dev, - 1, - 0, - &g->ecc.fb.mmu_fillunit_uncorrected_err_count); - - nvgpu_ecc_stat_remove(dev, - 1, - 0, - &g->ecc.fb.mmu_fillunit_corrected_err_count); - - nvgpu_ecc_stat_remove(dev, - 1, - 0, - &g->ecc.pmu.pmu_uncorrected_err_count); - - nvgpu_ecc_stat_remove(dev, - 1, - 0, - &g->ecc.pmu.pmu_corrected_err_count); -} diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c index 090ac7b4..fc0f9c84 100644 --- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c +++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c @@ -215,10 +215,6 @@ static const struct gpu_ops vgpu_gp10b_ops = { .update_boosted_ctx = NULL, .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3, .set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4, -#ifdef CONFIG_SYSFS - .create_gr_sysfs = gr_gp10b_create_sysfs, - .remove_gr_sysfs = gr_gp10b_remove_sysfs, -#endif .set_ctxsw_preemption_mode = vgpu_gr_gp10b_set_ctxsw_preemption_mode, .init_ctxsw_hdr_data = gr_gp10b_init_ctxsw_hdr_data, diff --git a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c index 5da9fed5..dbd00c23 100644 --- a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c @@ -232,10 +232,6 @@ static const struct gpu_ops vgpu_gv11b_ops = { .update_boosted_ctx = NULL, .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3, .set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4, -#ifdef CONFIG_SYSFS - .create_gr_sysfs = gr_gv11b_create_sysfs, - .remove_gr_sysfs = gr_gv11b_remove_sysfs, -#endif .set_ctxsw_preemption_mode = vgpu_gr_gp10b_set_ctxsw_preemption_mode, .is_etpc_addr = gv11b_gr_pri_is_etpc_addr, .egpc_etpc_priv_addr_table = gv11b_gr_egpc_etpc_priv_addr_table, -- cgit v1.2.2