From 7f14aafc2c02eb0fab458324d0ba91a7fdea3086 Mon Sep 17 00:00:00 2001 From: Richard Zhao Date: Tue, 26 Jun 2018 17:37:40 -0700 Subject: gpu: nvgpu: rework ecc structure and sysfs - create common file common/ecc.c which include common functions for add ecc counters and remove counters. - common code will create a list of all counter which make it easier to iterate all counters. - Add chip specific file for adding ecc counters. - add linux specific file os/linux/ecc_sysfs.c to export counters to sysfs. - remove obsolete code - MISRA violation for using snprintf is not solved, tracking with jira NVGPU-859 Jira NVGPUT-115 Change-Id: I1905c43c5c9b2b131199807533dee8e63ddc12f4 Signed-off-by: Richard Zhao Reviewed-on: https://git-master.nvidia.com/r/1763536 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gp10b/ecc_gp10b.c | 106 ++++++++++++++++++++++++++++++++++++ drivers/gpu/nvgpu/gp10b/ecc_gp10b.h | 28 ++++++++++ drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 26 ++++----- drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 6 +- drivers/gpu/nvgpu/gp10b/ltc_gp10b.c | 4 +- 5 files changed, 151 insertions(+), 19 deletions(-) create mode 100644 drivers/gpu/nvgpu/gp10b/ecc_gp10b.c create mode 100644 drivers/gpu/nvgpu/gp10b/ecc_gp10b.h (limited to 'drivers/gpu/nvgpu/gp10b') diff --git a/drivers/gpu/nvgpu/gp10b/ecc_gp10b.c b/drivers/gpu/nvgpu/gp10b/ecc_gp10b.c new file mode 100644 index 00000000..cf95c0d7 --- /dev/null +++ b/drivers/gpu/nvgpu/gp10b/ecc_gp10b.c @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include + +#include "gk20a/gk20a.h" +#include "gp10b/ecc_gp10b.h" + +int gp10b_ecc_init(struct gk20a *g) +{ + int err = 0; + + err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_lrf_ecc_single_err_count); + if (err != 0) { + goto done; + } + err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_lrf_ecc_double_err_count); + if (err != 0) { + goto done; + } + + err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_shm_ecc_sec_count); + if (err != 0) { + goto done; + } + err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_shm_ecc_sed_count); + if (err != 0) { + goto done; + } + err = NVGPU_ECC_COUNTER_INIT_PER_TPC(sm_shm_ecc_ded_count); + if (err != 0) { + goto done; + } + + err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_ecc_total_sec_pipe0_count); + if (err != 0) { + goto done; + } + err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_ecc_total_ded_pipe0_count); + if (err != 0) { + goto done; + } + + err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_unique_ecc_sec_pipe0_count); + if (err != 0) { + goto done; + } + err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_unique_ecc_ded_pipe0_count); + if (err != 0) { + goto done; + } + + err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_ecc_total_sec_pipe1_count); + if (err != 0) { + goto done; + } + err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_ecc_total_ded_pipe1_count); + if (err != 0) { + goto done; + } + + err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_unique_ecc_sec_pipe1_count); + if (err != 0) { + goto done; + } + err = NVGPU_ECC_COUNTER_INIT_PER_TPC(tex_unique_ecc_ded_pipe1_count); + if (err != 0) { + goto done; + } + + err = NVGPU_ECC_COUNTER_INIT_PER_LTS(ecc_sec_count); + if (err != 0) { + goto done; + } + err = NVGPU_ECC_COUNTER_INIT_PER_LTS(ecc_ded_count); + if (err != 0) { + goto done; + } + +done: + if (err != 0) { + nvgpu_err(g, "ecc counter allocate failed, err=%d", err); + nvgpu_ecc_free(g); + } + + return err; +} diff --git a/drivers/gpu/nvgpu/gp10b/ecc_gp10b.h b/drivers/gpu/nvgpu/gp10b/ecc_gp10b.h new file mode 100644 index 00000000..e5101db0 --- /dev/null +++ b/drivers/gpu/nvgpu/gp10b/ecc_gp10b.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __ECC_GP10B_H__ +#define __ECC_GP10B_H__ + +int gp10b_ecc_init(struct gk20a *g); + +#endif diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index 16eddeca..17c4e8b7 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c @@ -176,7 +176,7 @@ int gr_gp10b_handle_sm_exception(struct gk20a *g, lrf_ecc_ded_status, &lrf_single_count_delta, lrf_double_count_delta); - g->ecc.gr.sm_lrf_single_err_count.counters[tpc] += + g->ecc.gr.sm_lrf_ecc_single_err_count[gpc][tpc].counter += lrf_single_count_delta; } if (lrf_ecc_ded_status) { @@ -188,7 +188,7 @@ int gr_gp10b_handle_sm_exception(struct gk20a *g, lrf_ecc_ded_status, &lrf_double_count_delta, lrf_single_count_delta); - g->ecc.gr.sm_lrf_double_err_count.counters[tpc] += + g->ecc.gr.sm_lrf_ecc_double_err_count[gpc][tpc].counter += lrf_double_count_delta; } gk20a_writel(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset, @@ -213,9 +213,9 @@ int gr_gp10b_handle_sm_exception(struct gk20a *g, ecc_stats_reg_val = gk20a_readl(g, gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset); - g->ecc.gr.sm_shm_sec_count.counters[tpc] += + g->ecc.gr.sm_shm_ecc_sec_count[gpc][tpc].counter += gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_v(ecc_stats_reg_val); - g->ecc.gr.sm_shm_sed_count.counters[tpc] += + g->ecc.gr.sm_shm_ecc_sed_count[gpc][tpc].counter += gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_v(ecc_stats_reg_val); ecc_stats_reg_val &= ~(gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_m() | gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_m()); @@ -235,7 +235,7 @@ int gr_gp10b_handle_sm_exception(struct gk20a *g, ecc_stats_reg_val = gk20a_readl(g, gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset); - g->ecc.gr.sm_shm_ded_count.counters[tpc] += + g->ecc.gr.sm_shm_ecc_ded_count[gpc][tpc].counter += gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_v(ecc_stats_reg_val); ecc_stats_reg_val &= ~(gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_m()); gk20a_writel(g, @@ -276,7 +276,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, ecc_stats_reg_val = gk20a_readl(g, gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset); - g->ecc.gr.tex_total_sec_pipe0_count.counters[tpc] += + g->ecc.gr.tex_ecc_total_sec_pipe0_count[gpc][tpc].counter += gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_v(ecc_stats_reg_val); ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_m(); gk20a_writel(g, @@ -285,7 +285,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, ecc_stats_reg_val = gk20a_readl(g, gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset); - g->ecc.gr.tex_unique_sec_pipe0_count.counters[tpc] += + g->ecc.gr.tex_unique_ecc_sec_pipe0_count[gpc][tpc].counter += gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_v(ecc_stats_reg_val); ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_m(); gk20a_writel(g, @@ -300,7 +300,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, ecc_stats_reg_val = gk20a_readl(g, gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset); - g->ecc.gr.tex_total_sec_pipe1_count.counters[tpc] += + g->ecc.gr.tex_ecc_total_sec_pipe1_count[gpc][tpc].counter += gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_v(ecc_stats_reg_val); ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_m(); gk20a_writel(g, @@ -309,7 +309,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, ecc_stats_reg_val = gk20a_readl(g, gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset); - g->ecc.gr.tex_unique_sec_pipe1_count.counters[tpc] += + g->ecc.gr.tex_unique_ecc_sec_pipe1_count[gpc][tpc].counter += gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_v(ecc_stats_reg_val); ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_m(); gk20a_writel(g, @@ -332,7 +332,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, ecc_stats_reg_val = gk20a_readl(g, gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset); - g->ecc.gr.tex_total_ded_pipe0_count.counters[tpc] += + g->ecc.gr.tex_ecc_total_ded_pipe0_count[gpc][tpc].counter += gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_v(ecc_stats_reg_val); ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_m(); gk20a_writel(g, @@ -341,7 +341,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, ecc_stats_reg_val = gk20a_readl(g, gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset); - g->ecc.gr.tex_unique_ded_pipe0_count.counters[tpc] += + g->ecc.gr.tex_unique_ecc_ded_pipe0_count[gpc][tpc].counter += gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_v(ecc_stats_reg_val); ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_m(); gk20a_writel(g, @@ -356,7 +356,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, ecc_stats_reg_val = gk20a_readl(g, gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset); - g->ecc.gr.tex_total_ded_pipe1_count.counters[tpc] += + g->ecc.gr.tex_ecc_total_ded_pipe1_count[gpc][tpc].counter += gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_v(ecc_stats_reg_val); ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_m(); gk20a_writel(g, @@ -365,7 +365,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, ecc_stats_reg_val = gk20a_readl(g, gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset); - g->ecc.gr.tex_unique_ded_pipe1_count.counters[tpc] += + g->ecc.gr.tex_unique_ecc_ded_pipe1_count[gpc][tpc].counter += gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_v(ecc_stats_reg_val); ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_m(); gk20a_writel(g, diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index 94adf727..d32f644d 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -56,6 +56,7 @@ #include "gp10b/regops_gp10b.h" #include "gp10b/therm_gp10b.h" #include "gp10b/priv_ring_gp10b.h" +#include "gp10b/ecc_gp10b.h" #include "gm20b/ltc_gm20b.h" #include "gm20b/gr_gm20b.h" @@ -339,11 +340,8 @@ static const struct gpu_ops gp10b_ops = { .init_preemption_state = gr_gp10b_init_preemption_state, .update_boosted_ctx = gr_gp10b_update_boosted_ctx, .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3, -#ifdef CONFIG_SYSFS - .create_gr_sysfs = gr_gp10b_create_sysfs, - .remove_gr_sysfs = gr_gp10b_remove_sysfs, -#endif .set_ctxsw_preemption_mode = gr_gp10b_set_ctxsw_preemption_mode, + .init_ecc = gp10b_ecc_init, .init_ctxsw_hdr_data = gr_gp10b_init_ctxsw_hdr_data, .init_gfxp_wfi_timeout_count = gr_gp10b_init_gfxp_wfi_timeout_count, diff --git a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c index 1e5807d5..aeeda4a8 100644 --- a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c @@ -249,7 +249,7 @@ void gp10b_ltc_isr(struct gk20a *g) ecc_stats_reg_val = gk20a_readl(g, ltc_ltc0_lts0_dstg_ecc_report_r() + offset); - g->ecc.ltc.l2_sec_count.counters[ltc*g->ltc_count + slice] += + g->ecc.ltc.ecc_sec_count[ltc][slice].counter += ltc_ltc0_lts0_dstg_ecc_report_sec_count_v(ecc_stats_reg_val); ecc_stats_reg_val &= ~(ltc_ltc0_lts0_dstg_ecc_report_sec_count_m()); @@ -268,7 +268,7 @@ void gp10b_ltc_isr(struct gk20a *g) ecc_stats_reg_val = gk20a_readl(g, ltc_ltc0_lts0_dstg_ecc_report_r() + offset); - g->ecc.ltc.l2_ded_count.counters[ltc*g->ltc_count + slice] += + g->ecc.ltc.ecc_ded_count[ltc][slice].counter += ltc_ltc0_lts0_dstg_ecc_report_ded_count_v(ecc_stats_reg_val); ecc_stats_reg_val &= ~(ltc_ltc0_lts0_dstg_ecc_report_ded_count_m()); -- cgit v1.2.2