From 7f14aafc2c02eb0fab458324d0ba91a7fdea3086 Mon Sep 17 00:00:00 2001 From: Richard Zhao Date: Tue, 26 Jun 2018 17:37:40 -0700 Subject: gpu: nvgpu: rework ecc structure and sysfs - create common file common/ecc.c which include common functions for add ecc counters and remove counters. - common code will create a list of all counter which make it easier to iterate all counters. - Add chip specific file for adding ecc counters. - add linux specific file os/linux/ecc_sysfs.c to export counters to sysfs. - remove obsolete code - MISRA violation for using snprintf is not solved, tracking with jira NVGPU-859 Jira NVGPUT-115 Change-Id: I1905c43c5c9b2b131199807533dee8e63ddc12f4 Signed-off-by: Richard Zhao Reviewed-on: https://git-master.nvidia.com/r/1763536 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gk20a/ecc_gk20a.h | 102 ------------------------------------ drivers/gpu/nvgpu/gk20a/gk20a.h | 8 ++- drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 8 ++- 3 files changed, 9 insertions(+), 109 deletions(-) delete mode 100644 drivers/gpu/nvgpu/gk20a/ecc_gk20a.h (limited to 'drivers/gpu/nvgpu/gk20a') diff --git a/drivers/gpu/nvgpu/gk20a/ecc_gk20a.h b/drivers/gpu/nvgpu/gk20a/ecc_gk20a.h deleted file mode 100644 index 9c50a809..00000000 --- a/drivers/gpu/nvgpu/gk20a/ecc_gk20a.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - * GK20A ECC - * - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ -#ifndef ECC_GK20A_H -#define ECC_GK20A_H - -struct gk20a_ecc_stat { - char **names; - u32 *counters; - u32 count; -#ifdef CONFIG_SYSFS - struct hlist_node hash_node; - struct device_attribute *attr_array; -#endif -}; - -struct ecc_gk20a { - /* Stats per engine */ - struct { - struct gk20a_ecc_stat sm_lrf_single_err_count; - struct gk20a_ecc_stat sm_lrf_double_err_count; - - struct gk20a_ecc_stat sm_shm_sec_count; - struct gk20a_ecc_stat sm_shm_sed_count; - struct gk20a_ecc_stat sm_shm_ded_count; - - struct gk20a_ecc_stat tex_total_sec_pipe0_count; - struct gk20a_ecc_stat tex_total_ded_pipe0_count; - struct gk20a_ecc_stat tex_unique_sec_pipe0_count; - struct gk20a_ecc_stat tex_unique_ded_pipe0_count; - struct gk20a_ecc_stat tex_total_sec_pipe1_count; - struct gk20a_ecc_stat tex_total_ded_pipe1_count; - struct gk20a_ecc_stat tex_unique_sec_pipe1_count; - struct gk20a_ecc_stat tex_unique_ded_pipe1_count; - - struct gk20a_ecc_stat sm_l1_tag_corrected_err_count; - struct gk20a_ecc_stat sm_l1_tag_uncorrected_err_count; - struct gk20a_ecc_stat sm_cbu_corrected_err_count; - struct gk20a_ecc_stat sm_cbu_uncorrected_err_count; - struct gk20a_ecc_stat sm_l1_data_corrected_err_count; - struct gk20a_ecc_stat sm_l1_data_uncorrected_err_count; - struct gk20a_ecc_stat sm_icache_corrected_err_count; - struct gk20a_ecc_stat sm_icache_uncorrected_err_count; - struct gk20a_ecc_stat gcc_l15_corrected_err_count; - struct gk20a_ecc_stat gcc_l15_uncorrected_err_count; - struct gk20a_ecc_stat fecs_corrected_err_count; - struct gk20a_ecc_stat fecs_uncorrected_err_count; - struct gk20a_ecc_stat gpccs_corrected_err_count; - struct gk20a_ecc_stat gpccs_uncorrected_err_count; - struct gk20a_ecc_stat mmu_l1tlb_corrected_err_count; - struct gk20a_ecc_stat mmu_l1tlb_uncorrected_err_count; - } gr; - - struct { - struct gk20a_ecc_stat l2_sec_count; - struct gk20a_ecc_stat l2_ded_count; - struct gk20a_ecc_stat l2_cache_corrected_err_count; - struct gk20a_ecc_stat l2_cache_uncorrected_err_count; - } ltc; - - struct { - struct gk20a_ecc_stat mmu_l2tlb_corrected_err_count; - struct gk20a_ecc_stat mmu_l2tlb_uncorrected_err_count; - struct gk20a_ecc_stat mmu_hubtlb_corrected_err_count; - struct gk20a_ecc_stat mmu_hubtlb_uncorrected_err_count; - struct gk20a_ecc_stat mmu_fillunit_corrected_err_count; - struct gk20a_ecc_stat mmu_fillunit_uncorrected_err_count; - } fb; - - struct { - struct gk20a_ecc_stat pmu_corrected_err_count; - struct gk20a_ecc_stat pmu_uncorrected_err_count; - } pmu; - - struct { - struct gk20a_ecc_stat fbpa_sec_err_count; - struct gk20a_ecc_stat fbpa_ded_err_count; - } fbpa; - -}; - -#endif /*__ECC_GK20A_H__*/ diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 7cb8462f..e69036d7 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -35,7 +35,6 @@ struct gk20a_ctxsw_trace; struct acr_desc; struct nvgpu_mem_alloc_tracker; struct dbg_profiler_object_data; -struct ecc_gk20a; struct gk20a_debug_output; struct nvgpu_clk_pll_debug_data; struct nvgpu_nvhost_dev; @@ -64,6 +63,7 @@ struct nvgpu_ctxsw_trace_filter; #include #include #include +#include #include "clk_gk20a.h" #include "ce2_gk20a.h" @@ -77,7 +77,6 @@ struct nvgpu_ctxsw_trace_filter; #include "perf/perf.h" #include "pmgr/pmgr.h" #include "therm/thrm.h" -#include "ecc_gk20a.h" /* PTIMER_REF_FREQ_HZ corresponds to a period of 32 nanoseconds. 32 ns is the resolution of ptimer. */ @@ -384,8 +383,7 @@ struct gpu_ops { u32 gpc_exception); void (*enable_gpc_exceptions)(struct gk20a *g); void (*enable_exceptions)(struct gk20a *g); - void (*create_gr_sysfs)(struct gk20a *g); - void (*remove_gr_sysfs)(struct gk20a *g); + int (*init_ecc)(struct gk20a *g); u32 (*get_lrf_tex_ltc_dram_override)(struct gk20a *g); int (*record_sm_error_state)(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, struct channel_gk20a *fault_ch); @@ -1385,7 +1383,7 @@ struct gk20a { struct mm_gk20a mm; struct nvgpu_pmu pmu; struct acr_desc acr; - struct ecc_gk20a ecc; + struct nvgpu_ecc ecc; struct clk_pmupstate clk_pmu; struct perf_pmupstate perf_pmu; struct pmgr_pmupstate pmgr_pmu; diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index c70c1cd4..38570041 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -38,6 +38,7 @@ #include #include #include +#include #include "gk20a.h" #include "gr_gk20a.h" @@ -3127,6 +3128,8 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr) gr->ctx_vars.hwpm_ctxsw_buffer_offset_map = NULL; gk20a_comptag_allocator_destroy(g, &gr->comp_tags); + + nvgpu_ecc_remove_support(g); } static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) @@ -4872,8 +4875,9 @@ static int gk20a_init_gr_setup_sw(struct gk20a *g) gr->remove_support = gk20a_remove_gr_support; gr->sw_ready = true; - if (g->ops.gr.create_gr_sysfs) - g->ops.gr.create_gr_sysfs(g); + err = nvgpu_ecc_init_support(g); + if (err) + goto clean_up; nvgpu_log_fn(g, "done"); return 0; -- cgit v1.2.2