From c771d0b979cd9f42a21da520d5010873d2a6aa47 Mon Sep 17 00:00:00 2001 From: David Nieto Date: Thu, 18 May 2017 16:45:40 -0700 Subject: gpu: nvgpu: add GPC parity counters (1) Re-arrange the structure for ecc counters reporting so multiple units can be managed (2) Add counters and handling for additional GPC counters JIRA: GPUT19X-84 Change-Id: I74fd474d7daf7590fc7f7ddc9837bb692512d208 Signed-off-by: David Nieto Reviewed-on: http://git-master/r/1485277 Reviewed-by: svccoveritychecker GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gv11b/ecc_gv11b.h | 36 +++++++++ drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 103 ++++++++++++++++++++----- drivers/gpu/nvgpu/gv11b/gr_gv11b.h | 15 ---- drivers/gpu/nvgpu/gv11b/platform_gv11b_tegra.c | 94 +++++++++++++++++----- 4 files changed, 192 insertions(+), 56 deletions(-) create mode 100644 drivers/gpu/nvgpu/gv11b/ecc_gv11b.h (limited to 'drivers/gpu/nvgpu/gv11b') diff --git a/drivers/gpu/nvgpu/gv11b/ecc_gv11b.h b/drivers/gpu/nvgpu/gv11b/ecc_gv11b.h new file mode 100644 index 00000000..6b471655 --- /dev/null +++ b/drivers/gpu/nvgpu/gv11b/ecc_gv11b.h @@ -0,0 +1,36 @@ +/* + * GV11B GPU ECC + * + * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _NVGPU_ECC_GV11B_H_ +#define _NVGPU_ECC_GV11B_H_ + +struct ecc_gr_t19x { + struct gk20a_ecc_stat sm_l1_tag_corrected_err_count; + struct gk20a_ecc_stat sm_l1_tag_uncorrected_err_count; + struct gk20a_ecc_stat sm_cbu_corrected_err_count; + struct gk20a_ecc_stat sm_cbu_uncorrected_err_count; + struct gk20a_ecc_stat sm_l1_data_corrected_err_count; + struct gk20a_ecc_stat sm_l1_data_uncorrected_err_count; + struct gk20a_ecc_stat sm_icache_corrected_err_count; + struct gk20a_ecc_stat sm_icache_uncorrected_err_count; + struct gk20a_ecc_stat gcc_l15_corrected_err_count; + struct gk20a_ecc_stat gcc_l15_uncorrected_err_count; + struct gk20a_ecc_stat fecs_corrected_err_count; + struct gk20a_ecc_stat fecs_uncorrected_err_count; + struct gk20a_ecc_stat gpccs_corrected_err_count; + struct gk20a_ecc_stat gpccs_uncorrected_err_count; +}; + +#endif diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 764374cc..8b4471ca 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -163,7 +163,7 @@ static int gr_gv11b_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32 tpc, l1_tag_corrected_err_count_delta += (is_l1_tag_ecc_corrected_total_err_overflow << gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_total_s()); - g->gr.t19x.ecc_stats.sm_l1_tag_corrected_err_count.counters[tpc] += + g->ecc.gr.t19x.sm_l1_tag_corrected_err_count.counters[tpc] += l1_tag_corrected_err_count_delta; gk20a_writel(g, gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_r() + offset, @@ -178,7 +178,7 @@ static int gr_gv11b_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32 tpc, l1_tag_uncorrected_err_count_delta += (is_l1_tag_ecc_uncorrected_total_err_overflow << gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_total_s()); - g->gr.t19x.ecc_stats.sm_l1_tag_uncorrected_err_count.counters[tpc] += + g->ecc.gr.t19x.sm_l1_tag_uncorrected_err_count.counters[tpc] += l1_tag_uncorrected_err_count_delta; gk20a_writel(g, gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_r() + offset, @@ -255,7 +255,7 @@ static int gr_gv11b_handle_lrf_exception(struct gk20a *g, u32 gpc, u32 tpc, lrf_corrected_err_count_delta += (is_lrf_ecc_corrected_total_err_overflow << gr_pri_gpc0_tpc0_sm_lrf_ecc_corrected_err_count_total_s()); - g->gr.t18x.ecc_stats.sm_lrf_single_err_count.counters[tpc] += + g->ecc.gr.t18x.sm_lrf_single_err_count.counters[tpc] += lrf_corrected_err_count_delta; gk20a_writel(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_corrected_err_count_r() + offset, @@ -270,7 +270,7 @@ static int gr_gv11b_handle_lrf_exception(struct gk20a *g, u32 gpc, u32 tpc, lrf_uncorrected_err_count_delta += (is_lrf_ecc_uncorrected_total_err_overflow << gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_total_s()); - g->gr.t18x.ecc_stats.sm_lrf_double_err_count.counters[tpc] += + g->ecc.gr.t18x.sm_lrf_double_err_count.counters[tpc] += lrf_uncorrected_err_count_delta; gk20a_writel(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_r() + offset, @@ -339,7 +339,7 @@ static int gr_gv11b_handle_cbu_exception(struct gk20a *g, u32 gpc, u32 tpc, cbu_corrected_err_count_delta += (is_cbu_ecc_corrected_total_err_overflow << gr_pri_gpc0_tpc0_sm_cbu_ecc_corrected_err_count_total_s()); - g->gr.t19x.ecc_stats.sm_cbu_corrected_err_count.counters[tpc] += + g->ecc.gr.t19x.sm_cbu_corrected_err_count.counters[tpc] += cbu_corrected_err_count_delta; gk20a_writel(g, gr_pri_gpc0_tpc0_sm_cbu_ecc_corrected_err_count_r() + offset, @@ -354,7 +354,7 @@ static int gr_gv11b_handle_cbu_exception(struct gk20a *g, u32 gpc, u32 tpc, cbu_uncorrected_err_count_delta += (is_cbu_ecc_uncorrected_total_err_overflow << gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_total_s()); - g->gr.t19x.ecc_stats.sm_cbu_uncorrected_err_count.counters[tpc] += + g->ecc.gr.t19x.sm_cbu_uncorrected_err_count.counters[tpc] += cbu_uncorrected_err_count_delta; gk20a_writel(g, gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_r() + offset, @@ -419,7 +419,7 @@ static int gr_gv11b_handle_l1_data_exception(struct gk20a *g, u32 gpc, u32 tpc, l1_data_corrected_err_count_delta += (is_l1_data_ecc_corrected_total_err_overflow << gr_pri_gpc0_tpc0_sm_l1_data_ecc_corrected_err_count_total_s()); - g->gr.t19x.ecc_stats.sm_l1_data_corrected_err_count.counters[tpc] += + g->ecc.gr.t19x.sm_l1_data_corrected_err_count.counters[tpc] += l1_data_corrected_err_count_delta; gk20a_writel(g, gr_pri_gpc0_tpc0_sm_l1_data_ecc_corrected_err_count_r() + offset, @@ -434,7 +434,7 @@ static int gr_gv11b_handle_l1_data_exception(struct gk20a *g, u32 gpc, u32 tpc, l1_data_uncorrected_err_count_delta += (is_l1_data_ecc_uncorrected_total_err_overflow << gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_total_s()); - g->gr.t19x.ecc_stats.sm_l1_data_uncorrected_err_count.counters[tpc] += + g->ecc.gr.t19x.sm_l1_data_uncorrected_err_count.counters[tpc] += l1_data_uncorrected_err_count_delta; gk20a_writel(g, gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_r() + offset, @@ -503,7 +503,7 @@ static int gr_gv11b_handle_icache_exception(struct gk20a *g, u32 gpc, u32 tpc, icache_corrected_err_count_delta += (is_icache_ecc_corrected_total_err_overflow << gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_total_s()); - g->gr.t19x.ecc_stats.sm_icache_corrected_err_count.counters[tpc] += + g->ecc.gr.t19x.sm_icache_corrected_err_count.counters[tpc] += icache_corrected_err_count_delta; gk20a_writel(g, gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_r() + offset, @@ -518,7 +518,7 @@ static int gr_gv11b_handle_icache_exception(struct gk20a *g, u32 gpc, u32 tpc, icache_uncorrected_err_count_delta += (is_icache_ecc_uncorrected_total_err_overflow << gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_total_s()); - g->gr.t19x.ecc_stats.sm_icache_uncorrected_err_count.counters[tpc] += + g->ecc.gr.t19x.sm_icache_uncorrected_err_count.counters[tpc] += icache_uncorrected_err_count_delta; gk20a_writel(g, gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_r() + offset, @@ -606,7 +606,7 @@ static int gr_gv11b_handle_gcc_exception(struct gk20a *g, u32 gpc, u32 tpc, gcc_l15_corrected_err_count_delta += (is_gcc_l15_ecc_corrected_total_err_overflow << gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_total_s()); - g->gr.t19x.ecc_stats.gcc_l15_corrected_err_count.counters[gpc] += + g->ecc.gr.t19x.gcc_l15_corrected_err_count.counters[gpc] += gcc_l15_corrected_err_count_delta; gk20a_writel(g, gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_r() + offset, @@ -621,7 +621,7 @@ static int gr_gv11b_handle_gcc_exception(struct gk20a *g, u32 gpc, u32 tpc, gcc_l15_uncorrected_err_count_delta += (is_gcc_l15_ecc_uncorrected_total_err_overflow << gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_total_s()); - g->gr.t19x.ecc_stats.gcc_l15_uncorrected_err_count.counters[gpc] += + g->ecc.gr.t19x.gcc_l15_uncorrected_err_count.counters[gpc] += gcc_l15_uncorrected_err_count_delta; gk20a_writel(g, gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_r() + offset, @@ -639,6 +639,9 @@ static int gr_gv11b_handle_gpccs_ecc_exception(struct gk20a *g, u32 gpc, { int ret = 0; u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt; + u32 corrected_delta, uncorrected_delta; + u32 corrected_overflow, uncorrected_overflow; + int hww_esr; u32 offset = proj_gpc_stride_v() * gpc; @@ -657,10 +660,34 @@ static int gr_gv11b_handle_gpccs_ecc_exception(struct gk20a *g, u32 gpc, uncorrected_cnt = gk20a_readl(g, gr_gpc0_gpccs_falcon_ecc_uncorrected_err_count_r() + offset); + corrected_delta = gr_gpc0_gpccs_falcon_ecc_corrected_err_count_total_v( + corrected_cnt); + uncorrected_delta = gr_gpc0_gpccs_falcon_ecc_uncorrected_err_count_total_v( + uncorrected_cnt); + corrected_overflow = ecc_status & + gr_gpc0_gpccs_falcon_ecc_status_corrected_err_total_counter_overflow_m(); + + uncorrected_overflow = ecc_status & + gr_gpc0_gpccs_falcon_ecc_status_uncorrected_err_total_counter_overflow_m(); + + /* clear the interrupt */ + if ((corrected_delta > 0) || corrected_overflow) + gk20a_writel(g, + gr_gpc0_gpccs_falcon_ecc_corrected_err_count_r() + + offset, 0); + if ((uncorrected_delta > 0) || uncorrected_overflow) + gk20a_writel(g, + gr_gpc0_gpccs_falcon_ecc_uncorrected_err_count_r() + + offset, 0); + gk20a_writel(g, gr_gpc0_gpccs_falcon_ecc_status_r() + offset, gr_gpc0_gpccs_falcon_ecc_status_reset_task_f()); + g->ecc.gr.t19x.gpccs_corrected_err_count.counters[gpc] += + corrected_delta; + g->ecc.gr.t19x.gpccs_uncorrected_err_count.counters[gpc] += + uncorrected_delta; nvgpu_log(g, gpu_dbg_intr, "gppcs gpc:%d ecc interrupt intr: 0x%x", gpc, hww_esr); @@ -675,6 +702,8 @@ static int gr_gv11b_handle_gpccs_ecc_exception(struct gk20a *g, u32 gpc, if (ecc_status & gr_gpc0_gpccs_falcon_ecc_status_uncorrected_err_dmem_m()) nvgpu_log(g, gpu_dbg_intr, "dmem ecc error uncorrected"); + if (corrected_overflow || uncorrected_overflow) + nvgpu_info(g, "gpccs ecc counter overflow!"); nvgpu_log(g, gpu_dbg_intr, "ecc error row address: 0x%x", @@ -682,8 +711,8 @@ static int gr_gv11b_handle_gpccs_ecc_exception(struct gk20a *g, u32 gpc, nvgpu_log(g, gpu_dbg_intr, "ecc error count corrected: %d, uncorrected %d", - gr_gpc0_gpccs_falcon_ecc_corrected_err_count_total_v(corrected_cnt), - gr_gpc0_gpccs_falcon_ecc_uncorrected_err_count_total_v(uncorrected_cnt)); + g->ecc.gr.t19x.gpccs_corrected_err_count.counters[gpc], + g->ecc.gr.t19x.gpccs_uncorrected_err_count.counters[gpc]); return ret; } @@ -710,8 +739,8 @@ static void gr_gv11b_enable_gpc_exceptions(struct gk20a *g) gr_gpcs_gpccs_gpc_exception_en_tpc_f((1 << gr->tpc_count) - 1); gk20a_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(), - (tpc_mask | gr_gpcs_gpccs_gpc_exception_en_gcc_f(1) - gr_gpcs_gpccs_gpc_exception_en_gpccs_f(1)); + (tpc_mask | gr_gpcs_gpccs_gpc_exception_en_gcc_f(1) | + gr_gpcs_gpccs_gpc_exception_en_gpccs_f(1))); } static int gr_gv11b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, @@ -1690,6 +1719,8 @@ static int gr_gv11b_get_cilp_preempt_pending_chid(struct gk20a *g, int *__chid) static void gr_gv11b_handle_fecs_ecc_error(struct gk20a *g, u32 intr) { u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt; + u32 corrected_delta, uncorrected_delta; + u32 corrected_overflow, uncorrected_overflow; if (intr & (gr_fecs_host_int_status_ecc_uncorrected_m() | gr_fecs_host_int_status_ecc_corrected_m())) { @@ -1701,10 +1732,42 @@ static void gr_gv11b_handle_fecs_ecc_error(struct gk20a *g, u32 intr) uncorrected_cnt = gk20a_readl(g, gr_fecs_falcon_ecc_uncorrected_err_count_r()); + corrected_delta = + gr_fecs_falcon_ecc_corrected_err_count_total_v( + corrected_cnt); + uncorrected_delta = + gr_fecs_falcon_ecc_uncorrected_err_count_total_v( + uncorrected_cnt); + + corrected_overflow = ecc_status & + gr_fecs_falcon_ecc_status_corrected_err_total_counter_overflow_m(); + uncorrected_overflow = ecc_status & + gr_fecs_falcon_ecc_status_uncorrected_err_total_counter_overflow_m(); + + /* clear the interrupt */ + if ((corrected_delta > 0) || corrected_overflow) + gk20a_writel(g, + gr_fecs_falcon_ecc_corrected_err_count_r(), 0); + if ((uncorrected_delta > 0) || uncorrected_overflow) + gk20a_writel(g, + gr_fecs_falcon_ecc_uncorrected_err_count_r(), + 0); + + + /* clear the interrupt */ + gk20a_writel(g, gr_fecs_falcon_ecc_uncorrected_err_count_r(), + 0); + gk20a_writel(g, gr_fecs_falcon_ecc_corrected_err_count_r(), 0); + /* clear the interrupt */ gk20a_writel(g, gr_fecs_falcon_ecc_status_r(), gr_fecs_falcon_ecc_status_reset_task_f()); + g->ecc.gr.t19x.fecs_corrected_err_count.counters[0] += + corrected_delta; + g->ecc.gr.t19x.fecs_uncorrected_err_count.counters[0] += + uncorrected_delta; + nvgpu_log(g, gpu_dbg_intr, "fecs ecc interrupt intr: 0x%x", intr); @@ -1722,6 +1785,8 @@ static void gr_gv11b_handle_fecs_ecc_error(struct gk20a *g, u32 intr) gr_fecs_falcon_ecc_status_uncorrected_err_dmem_m()) nvgpu_log(g, gpu_dbg_intr, "dmem ecc error uncorrected"); + if (corrected_overflow || uncorrected_overflow) + nvgpu_info(g, "gpccs ecc counter overflow!"); nvgpu_log(g, gpu_dbg_intr, "ecc error row address: 0x%x", @@ -1729,10 +1794,8 @@ static void gr_gv11b_handle_fecs_ecc_error(struct gk20a *g, u32 intr) nvgpu_log(g, gpu_dbg_intr, "ecc error count corrected: %d, uncorrected %d", - gr_fecs_falcon_ecc_corrected_err_count_total_v( - corrected_cnt), - gr_fecs_falcon_ecc_uncorrected_err_count_total_v( - uncorrected_cnt)); + g->ecc.gr.t19x.fecs_corrected_err_count.counters[0], + g->ecc.gr.t19x.fecs_uncorrected_err_count.counters[0]); } } diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h index cf3842b6..9283a597 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h @@ -35,21 +35,6 @@ enum { VOLTA_DMA_COPY_A = 0xC3B5, }; -struct gr_t19x { - struct { - struct gr_gp10b_ecc_stat sm_l1_tag_corrected_err_count; - struct gr_gp10b_ecc_stat sm_l1_tag_uncorrected_err_count; - struct gr_gp10b_ecc_stat sm_cbu_corrected_err_count; - struct gr_gp10b_ecc_stat sm_cbu_uncorrected_err_count; - struct gr_gp10b_ecc_stat sm_l1_data_corrected_err_count; - struct gr_gp10b_ecc_stat sm_l1_data_uncorrected_err_count; - struct gr_gp10b_ecc_stat sm_icache_corrected_err_count; - struct gr_gp10b_ecc_stat sm_icache_uncorrected_err_count; - struct gr_gp10b_ecc_stat gcc_l15_corrected_err_count; - struct gr_gp10b_ecc_stat gcc_l15_uncorrected_err_count; - } ecc_stats; -}; - #define NVC397_SET_SHADER_EXCEPTIONS 0x1528 #define NVC397_SET_CIRCULAR_BUFFER_SIZE 0x1280 #define NVC397_SET_ALPHA_CIRCULAR_BUFFER_SIZE 0x02dc diff --git a/drivers/gpu/nvgpu/gv11b/platform_gv11b_tegra.c b/drivers/gpu/nvgpu/gv11b/platform_gv11b_tegra.c index 39ae68eb..1cfa2ef2 100644 --- a/drivers/gpu/nvgpu/gv11b/platform_gv11b_tegra.c +++ b/drivers/gpu/nvgpu/gv11b/platform_gv11b_tegra.c @@ -134,6 +134,11 @@ static struct device_attribute *dev_attr_sm_icache_ecc_uncorrected_err_count_arr static struct device_attribute *dev_attr_gcc_l15_ecc_corrected_err_count_array; static struct device_attribute *dev_attr_gcc_l15_ecc_uncorrected_err_count_array; +static struct device_attribute *dev_attr_fecs_ecc_corrected_err_count_array; +static struct device_attribute *dev_attr_fecs_ecc_uncorrected_err_count_array; +static struct device_attribute *dev_attr_gpccs_ecc_corrected_err_count_array; +static struct device_attribute *dev_attr_gpccs_ecc_uncorrected_err_count_array; + void gr_gv11b_create_sysfs(struct device *dev) { struct gk20a *g = get_gk20a(dev); @@ -142,7 +147,7 @@ void gr_gv11b_create_sysfs(struct device *dev) initialized multiple times but we only need to create the ECC stats once. Therefore, add the following check to avoid creating duplicate stat sysfs nodes. */ - if (g->gr.t19x.ecc_stats.sm_l1_tag_corrected_err_count.counters != NULL) + if (g->ecc.gr.t19x.sm_l1_tag_corrected_err_count.counters != NULL) return; gr_gp10b_create_sysfs(dev); @@ -150,63 +155,91 @@ void gr_gv11b_create_sysfs(struct device *dev) error |= gr_gp10b_ecc_stat_create(dev, 0, "sm_l1_tag_ecc_corrected_err_count", - &g->gr.t19x.ecc_stats.sm_l1_tag_corrected_err_count, + &g->ecc.gr.t19x.sm_l1_tag_corrected_err_count, dev_attr_sm_l1_tag_ecc_corrected_err_count_array); error |= gr_gp10b_ecc_stat_create(dev, 0, "sm_l1_tag_ecc_uncorrected_err_count", - &g->gr.t19x.ecc_stats.sm_l1_tag_uncorrected_err_count, + &g->ecc.gr.t19x.sm_l1_tag_uncorrected_err_count, dev_attr_sm_l1_tag_ecc_uncorrected_err_count_array); error |= gr_gp10b_ecc_stat_create(dev, 0, "sm_cbu_ecc_corrected_err_count", - &g->gr.t19x.ecc_stats.sm_cbu_corrected_err_count, + &g->ecc.gr.t19x.sm_cbu_corrected_err_count, dev_attr_sm_cbu_ecc_corrected_err_count_array); error |= gr_gp10b_ecc_stat_create(dev, 0, "sm_cbu_ecc_uncorrected_err_count", - &g->gr.t19x.ecc_stats.sm_cbu_uncorrected_err_count, + &g->ecc.gr.t19x.sm_cbu_uncorrected_err_count, dev_attr_sm_cbu_ecc_uncorrected_err_count_array); error |= gr_gp10b_ecc_stat_create(dev, 0, "sm_l1_data_ecc_corrected_err_count", - &g->gr.t19x.ecc_stats.sm_l1_data_corrected_err_count, + &g->ecc.gr.t19x.sm_l1_data_corrected_err_count, dev_attr_sm_l1_data_ecc_corrected_err_count_array); error |= gr_gp10b_ecc_stat_create(dev, 0, "sm_l1_data_ecc_uncorrected_err_count", - &g->gr.t19x.ecc_stats.sm_l1_data_uncorrected_err_count, + &g->ecc.gr.t19x.sm_l1_data_uncorrected_err_count, dev_attr_sm_l1_data_ecc_uncorrected_err_count_array); error |= gr_gp10b_ecc_stat_create(dev, 0, "sm_icache_ecc_corrected_err_count", - &g->gr.t19x.ecc_stats.sm_icache_corrected_err_count, + &g->ecc.gr.t19x.sm_icache_corrected_err_count, dev_attr_sm_icache_ecc_corrected_err_count_array); error |= gr_gp10b_ecc_stat_create(dev, 0, "sm_icache_ecc_uncorrected_err_count", - &g->gr.t19x.ecc_stats.sm_icache_uncorrected_err_count, + &g->ecc.gr.t19x.sm_icache_uncorrected_err_count, dev_attr_sm_icache_ecc_uncorrected_err_count_array); error |= gr_gp10b_ecc_stat_create(dev, 0, "gcc_l15_ecc_corrected_err_count", - &g->gr.t19x.ecc_stats.gcc_l15_corrected_err_count, + &g->ecc.gr.t19x.gcc_l15_corrected_err_count, dev_attr_gcc_l15_ecc_corrected_err_count_array); error |= gr_gp10b_ecc_stat_create(dev, 0, "gcc_l15_ecc_uncorrected_err_count", - &g->gr.t19x.ecc_stats.gcc_l15_uncorrected_err_count, + &g->ecc.gr.t19x.gcc_l15_uncorrected_err_count, dev_attr_gcc_l15_ecc_uncorrected_err_count_array); + error |= gp10b_ecc_stat_create(dev, + 1, + "gpc", + "fecs_ecc_uncorrected_err_count", + &g->ecc.gr.t19x.fecs_uncorrected_err_count, + dev_attr_fecs_ecc_uncorrected_err_count_array); + + error |= gp10b_ecc_stat_create(dev, + 1, + "gpc", + "fecs_ecc_corrected_err_count", + &g->ecc.gr.t19x.fecs_corrected_err_count, + dev_attr_fecs_ecc_corrected_err_count_array); + + error |= gp10b_ecc_stat_create(dev, + g->gr.gpc_count, + "gpc", + "gpccs_ecc_uncorrected_err_count", + &g->ecc.gr.t19x.gpccs_uncorrected_err_count, + dev_attr_gpccs_ecc_uncorrected_err_count_array); + + error |= gp10b_ecc_stat_create(dev, + g->gr.gpc_count, + "gpc", + "gpccs_ecc_corrected_err_count", + &g->ecc.gr.t19x.gpccs_corrected_err_count, + dev_attr_gpccs_ecc_corrected_err_count_array); + if (error) dev_err(dev, "Failed to create gv11b sysfs attributes!\n"); } @@ -217,52 +250,71 @@ static void gr_gv11b_remove_sysfs(struct device *dev) gr_gp10b_ecc_stat_remove(dev, 0, - &g->gr.t19x.ecc_stats.sm_l1_tag_corrected_err_count, + &g->ecc.gr.t19x.sm_l1_tag_corrected_err_count, dev_attr_sm_l1_tag_ecc_corrected_err_count_array); gr_gp10b_ecc_stat_remove(dev, 0, - &g->gr.t19x.ecc_stats.sm_l1_tag_uncorrected_err_count, + &g->ecc.gr.t19x.sm_l1_tag_uncorrected_err_count, dev_attr_sm_l1_tag_ecc_uncorrected_err_count_array); gr_gp10b_ecc_stat_remove(dev, 0, - &g->gr.t19x.ecc_stats.sm_cbu_corrected_err_count, + &g->ecc.gr.t19x.sm_cbu_corrected_err_count, dev_attr_sm_cbu_ecc_corrected_err_count_array); gr_gp10b_ecc_stat_remove(dev, 0, - &g->gr.t19x.ecc_stats.sm_cbu_uncorrected_err_count, + &g->ecc.gr.t19x.sm_cbu_uncorrected_err_count, dev_attr_sm_cbu_ecc_uncorrected_err_count_array); gr_gp10b_ecc_stat_remove(dev, 0, - &g->gr.t19x.ecc_stats.sm_l1_data_corrected_err_count, + &g->ecc.gr.t19x.sm_l1_data_corrected_err_count, dev_attr_sm_l1_data_ecc_corrected_err_count_array); gr_gp10b_ecc_stat_remove(dev, 0, - &g->gr.t19x.ecc_stats.sm_l1_data_uncorrected_err_count, + &g->ecc.gr.t19x.sm_l1_data_uncorrected_err_count, dev_attr_sm_l1_data_ecc_uncorrected_err_count_array); gr_gp10b_ecc_stat_remove(dev, 0, - &g->gr.t19x.ecc_stats.sm_icache_corrected_err_count, + &g->ecc.gr.t19x.sm_icache_corrected_err_count, dev_attr_sm_icache_ecc_corrected_err_count_array); gr_gp10b_ecc_stat_remove(dev, 0, - &g->gr.t19x.ecc_stats.sm_icache_uncorrected_err_count, + &g->ecc.gr.t19x.sm_icache_uncorrected_err_count, dev_attr_sm_icache_ecc_uncorrected_err_count_array); gr_gp10b_ecc_stat_remove(dev, 0, - &g->gr.t19x.ecc_stats.gcc_l15_corrected_err_count, + &g->ecc.gr.t19x.gcc_l15_corrected_err_count, dev_attr_gcc_l15_ecc_corrected_err_count_array); gr_gp10b_ecc_stat_remove(dev, 0, - &g->gr.t19x.ecc_stats.gcc_l15_uncorrected_err_count, + &g->ecc.gr.t19x.gcc_l15_uncorrected_err_count, dev_attr_gcc_l15_ecc_uncorrected_err_count_array); + gp10b_ecc_stat_remove(dev, + 1, + &g->ecc.gr.t19x.fecs_uncorrected_err_count, + dev_attr_fecs_ecc_uncorrected_err_count_array); + + gp10b_ecc_stat_remove(dev, + 1, + &g->ecc.gr.t19x.fecs_corrected_err_count, + dev_attr_fecs_ecc_corrected_err_count_array); + + gp10b_ecc_stat_remove(dev, + g->gr.gpc_count, + &g->ecc.gr.t19x.gpccs_uncorrected_err_count, + dev_attr_gpccs_ecc_uncorrected_err_count_array); + + gp10b_ecc_stat_remove(dev, + g->gr.gpc_count, + &g->ecc.gr.t19x.gpccs_corrected_err_count, + dev_attr_gpccs_ecc_corrected_err_count_array); } -- cgit v1.2.2