From e9b03e903c10e1fce9daf5fa7e51b8c4a0b65c95 Mon Sep 17 00:00:00 2001 From: Adeel Raza Date: Fri, 11 Dec 2015 16:16:21 -0800 Subject: gpu: nvgpu: gp10b: add ECC stats sysfs nodes Add sysfs nodes for querying ECC single/double bit error counts. Bug 1699676 Change-Id: I6d5219facadaa17207ac759b88fe19077207d8f1 Signed-off-by: Adeel Raza Reviewed-on: http://git-master/r/935363 Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 145 +++++++++++ drivers/gpu/nvgpu/gp10b/gr_gp10b.h | 28 +++ drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h | 92 +++++++ drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h | 22 +- drivers/gpu/nvgpu/gp10b/ltc_gp10b.c | 33 ++- drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c | 328 +++++++++++++++++++++++++ 6 files changed, 644 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/nvgpu/gp10b') diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index c66dea92..90d0ce8d 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c @@ -80,6 +80,13 @@ static int gr_gp10b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, "Single bit error detected in SM LRF!"); + + g->gr.t18x.ecc_stats.sm_lrf_single_err_count.counters[tpc] += + gk20a_readl(g, + gr_pri_gpc0_tpc0_sm_lrf_ecc_single_err_count_r() + offset); + gk20a_writel(g, + gr_pri_gpc0_tpc0_sm_lrf_ecc_single_err_count_r() + offset, + 0); } if ( (lrf_ecc_status & gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp0_pending_f()) || @@ -92,6 +99,13 @@ static int gr_gp10b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, "Double bit error detected in SM LRF!"); + + g->gr.t18x.ecc_stats.sm_lrf_double_err_count.counters[tpc] += + gk20a_readl(g, + gr_pri_gpc0_tpc0_sm_lrf_ecc_double_err_count_r() + offset); + gk20a_writel(g, + gr_pri_gpc0_tpc0_sm_lrf_ecc_double_err_count_r() + offset, + 0); } gk20a_writel(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset, lrf_ecc_status); @@ -107,17 +121,42 @@ static int gr_gp10b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_detected_shm0_pending_f()) || (shm_ecc_status & gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_detected_shm1_pending_f()) ) { + u32 ecc_stats_reg_val; gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, "Single bit error detected in SM SHM!"); + + ecc_stats_reg_val = + gk20a_readl(g, + gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset); + g->gr.t18x.ecc_stats.sm_shm_sec_count.counters[tpc] += + gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_v(ecc_stats_reg_val); + g->gr.t18x.ecc_stats.sm_shm_sed_count.counters[tpc] += + gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_v(ecc_stats_reg_val); + ecc_stats_reg_val &= ~(gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_m() | + gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_m()); + gk20a_writel(g, + gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset, + ecc_stats_reg_val); } if ( (shm_ecc_status & gr_pri_gpc0_tpc0_sm_shm_ecc_status_double_err_detected_shm0_pending_f()) || (shm_ecc_status & gr_pri_gpc0_tpc0_sm_shm_ecc_status_double_err_detected_shm1_pending_f()) ) { + u32 ecc_stats_reg_val; gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, "Double bit error detected in SM SHM!"); + + ecc_stats_reg_val = + gk20a_readl(g, + gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset); + g->gr.t18x.ecc_stats.sm_shm_ded_count.counters[tpc] += + gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_v(ecc_stats_reg_val); + ecc_stats_reg_val &= ~(gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_m()); + gk20a_writel(g, + gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset, + ecc_stats_reg_val); } gk20a_writel(g, gr_pri_gpc0_tpc0_sm_shm_ecc_status_r() + offset, shm_ecc_status); @@ -133,6 +172,7 @@ static int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 offset = proj_gpc_stride_v() * gpc + proj_tpc_in_gpc_stride_v() * tpc; u32 esr; + u32 ecc_stats_reg_val; gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); @@ -143,10 +183,114 @@ static int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, if (esr & gr_gpc0_tpc0_tex_m_hww_esr_ecc_sec_pending_f()) { gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, "Single bit error detected in TEX!"); + + /* Pipe 0 counters */ + gk20a_writel(g, + gr_pri_gpc0_tpc0_tex_m_routing_r() + offset, + gr_pri_gpc0_tpc0_tex_m_routing_sel_pipe0_f()); + + ecc_stats_reg_val = gk20a_readl(g, + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset); + g->gr.t18x.ecc_stats.tex_total_sec_pipe0_count.counters[tpc] += + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_v(ecc_stats_reg_val); + ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_m(); + gk20a_writel(g, + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset, + ecc_stats_reg_val); + + ecc_stats_reg_val = gk20a_readl(g, + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset); + g->gr.t18x.ecc_stats.tex_unique_sec_pipe0_count.counters[tpc] += + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_v(ecc_stats_reg_val); + ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_m(); + gk20a_writel(g, + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset, + ecc_stats_reg_val); + + + /* Pipe 1 counters */ + gk20a_writel(g, + gr_pri_gpc0_tpc0_tex_m_routing_r() + offset, + gr_pri_gpc0_tpc0_tex_m_routing_sel_pipe1_f()); + + ecc_stats_reg_val = gk20a_readl(g, + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset); + g->gr.t18x.ecc_stats.tex_total_sec_pipe1_count.counters[tpc] += + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_v(ecc_stats_reg_val); + ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_m(); + gk20a_writel(g, + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset, + ecc_stats_reg_val); + + ecc_stats_reg_val = gk20a_readl(g, + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset); + g->gr.t18x.ecc_stats.tex_unique_sec_pipe1_count.counters[tpc] += + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_v(ecc_stats_reg_val); + ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_m(); + gk20a_writel(g, + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset, + ecc_stats_reg_val); + + + gk20a_writel(g, + gr_pri_gpc0_tpc0_tex_m_routing_r() + offset, + gr_pri_gpc0_tpc0_tex_m_routing_sel_default_f()); } if (esr & gr_gpc0_tpc0_tex_m_hww_esr_ecc_ded_pending_f()) { gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr, "Double bit error detected in TEX!"); + + /* Pipe 0 counters */ + gk20a_writel(g, + gr_pri_gpc0_tpc0_tex_m_routing_r() + offset, + gr_pri_gpc0_tpc0_tex_m_routing_sel_pipe0_f()); + + ecc_stats_reg_val = gk20a_readl(g, + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset); + g->gr.t18x.ecc_stats.tex_total_ded_pipe0_count.counters[tpc] += + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_v(ecc_stats_reg_val); + ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_m(); + gk20a_writel(g, + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset, + ecc_stats_reg_val); + + ecc_stats_reg_val = gk20a_readl(g, + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset); + g->gr.t18x.ecc_stats.tex_unique_ded_pipe0_count.counters[tpc] += + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_v(ecc_stats_reg_val); + ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_m(); + gk20a_writel(g, + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset, + ecc_stats_reg_val); + + + /* Pipe 1 counters */ + gk20a_writel(g, + gr_pri_gpc0_tpc0_tex_m_routing_r() + offset, + gr_pri_gpc0_tpc0_tex_m_routing_sel_pipe1_f()); + + ecc_stats_reg_val = gk20a_readl(g, + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset); + g->gr.t18x.ecc_stats.tex_total_ded_pipe1_count.counters[tpc] += + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_v(ecc_stats_reg_val); + ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_m(); + gk20a_writel(g, + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r() + offset, + ecc_stats_reg_val); + + ecc_stats_reg_val = gk20a_readl(g, + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset); + g->gr.t18x.ecc_stats.tex_unique_ded_pipe1_count.counters[tpc] += + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_v(ecc_stats_reg_val); + ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_m(); + gk20a_writel(g, + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset, + ecc_stats_reg_val); + + + gk20a_writel(g, + gr_pri_gpc0_tpc0_tex_m_routing_r() + offset, + gr_pri_gpc0_tpc0_tex_m_routing_sel_default_f()); } gk20a_writel(g, @@ -1594,4 +1738,5 @@ void gp10b_init_gr(struct gpu_ops *gops) gops->gr.pre_process_sm_exception = gr_gp10b_pre_process_sm_exception; gops->gr.handle_fecs_error = gr_gp10b_handle_fecs_error; + gops->gr.create_gr_sysfs = gr_gp10b_create_sysfs; } diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h index c35fb384..bd4b5879 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h @@ -35,6 +35,13 @@ enum { void gp10b_init_gr(struct gpu_ops *ops); int gr_gp10b_alloc_buffer(struct vm_gk20a *vm, size_t size, struct mem_desc *mem); +void gr_gp10b_create_sysfs(struct platform_device *dev); + +struct ecc_stat { + char **names; + u32 *counters; + struct hlist_node hash_node; +}; struct gr_t18x { struct { @@ -47,6 +54,27 @@ struct gr_t18x { struct dentry *debugfs_dump_ctxsw_stats; } ctx_vars; + struct { + struct ecc_stat sm_lrf_single_err_count; + struct ecc_stat sm_lrf_double_err_count; + + struct ecc_stat sm_shm_sec_count; + struct ecc_stat sm_shm_sed_count; + struct ecc_stat sm_shm_ded_count; + + struct ecc_stat tex_total_sec_pipe0_count; + struct ecc_stat tex_total_ded_pipe0_count; + struct ecc_stat tex_unique_sec_pipe0_count; + struct ecc_stat tex_unique_ded_pipe0_count; + struct ecc_stat tex_total_sec_pipe1_count; + struct ecc_stat tex_total_ded_pipe1_count; + struct ecc_stat tex_unique_sec_pipe1_count; + struct ecc_stat tex_unique_ded_pipe1_count; + + struct ecc_stat l2_sec_count; + struct ecc_stat l2_ded_count; + } ecc_stats; + int cilp_preempt_pending_chid; }; diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h index b494482a..0480527c 100644 --- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h +++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h @@ -530,6 +530,98 @@ static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_status_double_err_detected_shm1_pe { return 0x200; } +static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_single_err_count_r(void) +{ + return 0x005046bc; +} +static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_double_err_count_r(void) +{ + return 0x005046c0; +} +static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r(void) +{ + return 0x005044a4; +} +static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_m(void) +{ + return 0xff << 0; +} +static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_v(u32 r) +{ + return (r >> 0) & 0xff; +} +static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_m(void) +{ + return 0xff << 8; +} +static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_v(u32 r) +{ + return (r >> 8) & 0xff; +} +static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_m(void) +{ + return 0xff << 16; +} +static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_v(u32 r) +{ + return (r >> 16) & 0xff; +} +static inline u32 gr_pri_gpc0_tpc0_tex_m_routing_r(void) +{ + return 0x005042c4; +} +static inline u32 gr_pri_gpc0_tpc0_tex_m_routing_sel_default_f(void) +{ + return 0x0; +} +static inline u32 gr_pri_gpc0_tpc0_tex_m_routing_sel_pipe0_f(void) +{ + return 0x1; +} +static inline u32 gr_pri_gpc0_tpc0_tex_m_routing_sel_pipe1_f(void) +{ + return 0x2; +} +static inline u32 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_r(void) +{ + return 0x00504218; +} +static inline u32 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_m(void) +{ + return 0xffff << 0; +} +static inline u32 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_sec_v(u32 r) +{ + return (r >> 0) & 0xffff; +} +static inline u32 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_m(void) +{ + return 0xffff << 16; +} +static inline u32 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_total_ded_v(u32 r) +{ + return (r >> 16) & 0xffff; +} +static inline u32 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r(void) +{ + return 0x005042ec; +} +static inline u32 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_m(void) +{ + return 0xffff << 0; +} +static inline u32 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_sec_v(u32 r) +{ + return (r >> 0) & 0xffff; +} +static inline u32 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_m(void) +{ + return 0xffff << 16; +} +static inline u32 gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_v(u32 r) +{ + return (r >> 16) & 0xffff; +} static inline u32 gr_pri_be0_crop_status1_r(void) { return 0x00410134; diff --git a/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h index 302c2243..5916f695 100644 --- a/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h +++ b/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -314,6 +314,26 @@ static inline u32 ltc_ltc0_lts0_intr_r(void) { return 0x0014040c; } +static inline u32 ltc_ltc0_lts0_dstg_ecc_report_r(void) +{ + return 0x0014051c; +} +static inline u32 ltc_ltc0_lts0_dstg_ecc_report_sec_count_m(void) +{ + return 0xff << 0; +} +static inline u32 ltc_ltc0_lts0_dstg_ecc_report_sec_count_v(u32 r) +{ + return (r >> 0) & 0xff; +} +static inline u32 ltc_ltc0_lts0_dstg_ecc_report_ded_count_m(void) +{ + return 0xff << 16; +} +static inline u32 ltc_ltc0_lts0_dstg_ecc_report_ded_count_v(u32 r) +{ + return (r >> 16) & 0xff; +} static inline u32 ltc_ltcs_ltss_tstg_cmgmt0_r(void) { return 0x0017e2a0; diff --git a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c index d0be86a4..e68e762d 100644 --- a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c @@ -133,21 +133,48 @@ static void gp10b_ltc_isr(struct gk20a *g) if ((mc_intr & 1 << ltc) == 0) continue; for (slice = 0; slice < g->gr.slices_per_ltc; slice++) { - ltc_intr = gk20a_readl(g, ltc_ltc0_lts0_intr_r() + - proj_ltc_stride_v() * ltc + - proj_lts_stride_v() * slice); + u32 offset = proj_ltc_stride_v() * ltc + + proj_lts_stride_v() * slice; + ltc_intr = gk20a_readl(g, ltc_ltc0_lts0_intr_r() + offset); /* Detect and handle ECC errors */ if (ltc_intr & ltc_ltcs_ltss_intr_ecc_sec_error_pending_f()) { + u32 ecc_stats_reg_val; + gk20a_err(dev_from_gk20a(g), "Single bit error detected in GPU L2!"); + + ecc_stats_reg_val = + gk20a_readl(g, + ltc_ltc0_lts0_dstg_ecc_report_r() + offset); + g->gr.t18x.ecc_stats.l2_sec_count.counters[ltc] += + ltc_ltc0_lts0_dstg_ecc_report_sec_count_v(ecc_stats_reg_val); + ecc_stats_reg_val &= + ~(ltc_ltc0_lts0_dstg_ecc_report_sec_count_m()); + gk20a_writel(g, + ltc_ltc0_lts0_dstg_ecc_report_r() + offset, + ecc_stats_reg_val); + g->ops.mm.l2_flush(g, true); } if (ltc_intr & ltc_ltcs_ltss_intr_ecc_ded_error_pending_f()) { + u32 ecc_stats_reg_val; + gk20a_err(dev_from_gk20a(g), "Double bit error detected in GPU L2!"); + + ecc_stats_reg_val = + gk20a_readl(g, + ltc_ltc0_lts0_dstg_ecc_report_r() + offset); + g->gr.t18x.ecc_stats.l2_ded_count.counters[ltc] += + ltc_ltc0_lts0_dstg_ecc_report_ded_count_v(ecc_stats_reg_val); + ecc_stats_reg_val &= + ~(ltc_ltc0_lts0_dstg_ecc_report_ded_count_m()); + gk20a_writel(g, + ltc_ltc0_lts0_dstg_ecc_report_r() + offset, + ecc_stats_reg_val); } gk20a_err(dev_from_gk20a(g), "ltc%d, slice %d: %08x", diff --git a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c index 8bddff3d..0cfb1d91 100644 --- a/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c +++ b/drivers/gpu/nvgpu/gp10b/platform_gp10b_tegra.c @@ -26,9 +26,14 @@ #include #include #include +#include #include "gk20a/platform_gk20a.h" #include "gk20a/gk20a.h" #include "platform_tegra.h" +#include "gr_gp10b.h" +#include "ltc_gp10b.h" +#include "hw_gr_gp10b.h" +#include "hw_ltc_gp10b.h" #define GP10B_MAX_SUPPORTED_FREQS 11 static unsigned long gp10b_freq_table[GP10B_MAX_SUPPORTED_FREQS]; @@ -40,6 +45,8 @@ static struct { {"gpu", 1000000000}, {"gpu_sys", 204000000} }; +static void gr_gp10b_remove_sysfs(struct device *dev); + /* * gp10b_tegra_get_clocks() * @@ -144,6 +151,8 @@ static int gp10b_tegra_remove(struct platform_device *pdev) /* remove gk20a power subdomain from host1x */ nvhost_unregister_client_domain(dev_to_genpd(&pdev->dev)); + gr_gp10b_remove_sysfs(&pdev->dev); + return 0; } @@ -345,3 +354,322 @@ struct gk20a_platform t18x_gpu_tegra_platform = { .force_reset_in_do_idle = true, }; + + +#define ECC_STAT_NAME_MAX_SIZE 100 + + +DEFINE_HASHTABLE(ecc_hash_table, 5); + +static struct device_attribute *dev_attr_sm_lrf_ecc_single_err_count_array; +static struct device_attribute *dev_attr_sm_lrf_ecc_double_err_count_array; + +static struct device_attribute *dev_attr_sm_shm_ecc_sec_count_array; +static struct device_attribute *dev_attr_sm_shm_ecc_sed_count_array; +static struct device_attribute *dev_attr_sm_shm_ecc_ded_count_array; + +static struct device_attribute *dev_attr_tex_ecc_total_sec_pipe0_count_array; +static struct device_attribute *dev_attr_tex_ecc_total_ded_pipe0_count_array; +static struct device_attribute *dev_attr_tex_ecc_unique_sec_pipe0_count_array; +static struct device_attribute *dev_attr_tex_ecc_unique_ded_pipe0_count_array; +static struct device_attribute *dev_attr_tex_ecc_total_sec_pipe1_count_array; +static struct device_attribute *dev_attr_tex_ecc_total_ded_pipe1_count_array; +static struct device_attribute *dev_attr_tex_ecc_unique_sec_pipe1_count_array; +static struct device_attribute *dev_attr_tex_ecc_unique_ded_pipe1_count_array; + +static struct device_attribute *dev_attr_l2_ecc_sec_count_array; +static struct device_attribute *dev_attr_l2_ecc_ded_count_array; + + +static u32 gen_ecc_hash_key(char *str) +{ + int i = 0; + u32 hash_key = 0; + + while (str[i]) { + hash_key += (u32)(str[i]); + i++; + }; + + return hash_key; +} + +static ssize_t ecc_stat_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + const char *ecc_stat_full_name = attr->attr.name; + const char *ecc_stat_base_name; + unsigned int hw_unit; + struct ecc_stat *ecc_stat; + u32 hash_key; + + if (sscanf(ecc_stat_full_name, "ltc%u", &hw_unit) == 1) { + ecc_stat_base_name = &(ecc_stat_full_name[strlen("ltc0_")]); + } else if (sscanf(ecc_stat_full_name, "gpc0_tpc%u", &hw_unit) == 1) { + ecc_stat_base_name = &(ecc_stat_full_name[strlen("gpc0_tpc0_")]); + } else { + return snprintf(buf, + PAGE_SIZE, + "Error: Invalid ECC stat name!\n"); + } + + hash_key = gen_ecc_hash_key((char *)ecc_stat_base_name); + hash_for_each_possible(ecc_hash_table, + ecc_stat, + hash_node, + hash_key) { + if (!strcmp(ecc_stat_full_name, ecc_stat->names[hw_unit])) + return snprintf(buf, PAGE_SIZE, "%u\n", ecc_stat->counters[hw_unit]); + } + + return snprintf(buf, PAGE_SIZE, "Error: No ECC stat found!\n"); +} + +static int ecc_stat_create(struct platform_device *dev, + int is_l2, + char *ecc_stat_name, + struct ecc_stat *ecc_stat, + struct device_attribute *dev_attr_array) +{ + int error = 0; + struct gk20a *g = get_gk20a(dev); + int num_hw_units = 0; + int hw_unit = 0; + u32 hash_key = 0; + + if (is_l2) + num_hw_units = g->ltc_count; + else + num_hw_units = g->gr.tpc_count; + + /* Allocate arrays */ + dev_attr_array = kzalloc(sizeof(struct device_attribute) * num_hw_units, GFP_KERNEL); + ecc_stat->counters = kzalloc(sizeof(u32) * num_hw_units, GFP_KERNEL); + ecc_stat->names = kzalloc(sizeof(char *) * num_hw_units, GFP_KERNEL); + for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) { + ecc_stat->names[hw_unit] = kzalloc(sizeof(char) * ECC_STAT_NAME_MAX_SIZE, GFP_KERNEL); + } + + for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) { + /* Fill in struct device_attribute members */ + if (is_l2) + snprintf(ecc_stat->names[hw_unit], + ECC_STAT_NAME_MAX_SIZE, + "ltc%d_%s", + hw_unit, + ecc_stat_name); + else + snprintf(ecc_stat->names[hw_unit], + ECC_STAT_NAME_MAX_SIZE, + "gpc0_tpc%d_%s", + hw_unit, + ecc_stat_name); + dev_attr_array[hw_unit].attr.name = ecc_stat->names[hw_unit]; + dev_attr_array[hw_unit].attr.mode = VERIFY_OCTAL_PERMISSIONS(S_IRUGO); + dev_attr_array[hw_unit].show = ecc_stat_show; + dev_attr_array[hw_unit].store = NULL; + + /* Create sysfs file */ + error |= device_create_file(&dev->dev, + &dev_attr_array[hw_unit]); + } + + /* Add hash table entry */ + hash_key = gen_ecc_hash_key(ecc_stat_name); + hash_add(ecc_hash_table, + &ecc_stat->hash_node, + hash_key); + + return error; +} + +static void ecc_stat_remove(struct device *dev, + int is_l2, + struct ecc_stat *ecc_stat, + struct device_attribute *dev_attr_array) +{ + struct platform_device *ndev = to_platform_device(dev); + struct gk20a *g = get_gk20a(ndev); + int num_hw_units = 0; + int hw_unit = 0; + + if (is_l2) + num_hw_units = g->ltc_count; + else + num_hw_units = g->gr.tpc_count; + + /* Remove sysfs files */ + for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) { + device_remove_file(dev, &dev_attr_array[hw_unit]); + } + + /* Remove hash table entry */ + hash_del(&ecc_stat->hash_node); + + /* Free arrays */ + kfree(ecc_stat->counters); + for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) { + kfree(ecc_stat->names[hw_unit]); + } + kfree(ecc_stat->names); + kfree(dev_attr_array); +} + +void gr_gp10b_create_sysfs(struct platform_device *dev) +{ + int error = 0; + struct gk20a *g = get_gk20a(dev); + + error |= ecc_stat_create(dev, + 0, + "sm_lrf_ecc_single_err_count", + &g->gr.t18x.ecc_stats.sm_lrf_single_err_count, + dev_attr_sm_lrf_ecc_single_err_count_array); + error |= ecc_stat_create(dev, + 0, + "sm_lrf_ecc_double_err_count", + &g->gr.t18x.ecc_stats.sm_lrf_double_err_count, + dev_attr_sm_lrf_ecc_double_err_count_array); + + error |= ecc_stat_create(dev, + 0, + "sm_shm_ecc_sec_count", + &g->gr.t18x.ecc_stats.sm_shm_sec_count, + dev_attr_sm_shm_ecc_sec_count_array); + error |= ecc_stat_create(dev, + 0, + "sm_shm_ecc_sed_count", + &g->gr.t18x.ecc_stats.sm_shm_sed_count, + dev_attr_sm_shm_ecc_sed_count_array); + error |= ecc_stat_create(dev, + 0, + "sm_shm_ecc_ded_count", + &g->gr.t18x.ecc_stats.sm_shm_ded_count, + dev_attr_sm_shm_ecc_ded_count_array); + + error |= ecc_stat_create(dev, + 0, + "tex_ecc_total_sec_pipe0_count", + &g->gr.t18x.ecc_stats.tex_total_sec_pipe0_count, + dev_attr_tex_ecc_total_sec_pipe0_count_array); + error |= ecc_stat_create(dev, + 0, + "tex_ecc_total_ded_pipe0_count", + &g->gr.t18x.ecc_stats.tex_total_ded_pipe0_count, + dev_attr_tex_ecc_total_ded_pipe0_count_array); + error |= ecc_stat_create(dev, + 0, + "tex_ecc_unique_sec_pipe0_count", + &g->gr.t18x.ecc_stats.tex_unique_sec_pipe0_count, + dev_attr_tex_ecc_unique_sec_pipe0_count_array); + error |= ecc_stat_create(dev, + 0, + "tex_ecc_unique_ded_pipe0_count", + &g->gr.t18x.ecc_stats.tex_unique_ded_pipe0_count, + dev_attr_tex_ecc_unique_ded_pipe0_count_array); + error |= ecc_stat_create(dev, + 0, + "tex_ecc_total_sec_pipe1_count", + &g->gr.t18x.ecc_stats.tex_total_sec_pipe1_count, + dev_attr_tex_ecc_total_sec_pipe1_count_array); + error |= ecc_stat_create(dev, + 0, + "tex_ecc_total_ded_pipe1_count", + &g->gr.t18x.ecc_stats.tex_total_ded_pipe1_count, + dev_attr_tex_ecc_total_ded_pipe1_count_array); + error |= ecc_stat_create(dev, + 0, + "tex_ecc_unique_sec_pipe1_count", + &g->gr.t18x.ecc_stats.tex_unique_sec_pipe1_count, + dev_attr_tex_ecc_unique_sec_pipe1_count_array); + error |= ecc_stat_create(dev, + 0, + "tex_ecc_unique_ded_pipe1_count", + &g->gr.t18x.ecc_stats.tex_unique_ded_pipe1_count, + dev_attr_tex_ecc_unique_ded_pipe1_count_array); + + error |= ecc_stat_create(dev, + 1, + "lts0_ecc_sec_count", + &g->gr.t18x.ecc_stats.l2_sec_count, + dev_attr_l2_ecc_sec_count_array); + error |= ecc_stat_create(dev, + 1, + "lts0_ecc_ded_count", + &g->gr.t18x.ecc_stats.l2_ded_count, + dev_attr_l2_ecc_ded_count_array); + + if (error) + dev_err(&dev->dev, "Failed to create sysfs attributes!\n"); +} + +static void gr_gp10b_remove_sysfs(struct device *dev) +{ + struct platform_device *ndev = to_platform_device(dev); + struct gk20a *g = get_gk20a(ndev); + + ecc_stat_remove(dev, + 0, + &g->gr.t18x.ecc_stats.sm_lrf_single_err_count, + dev_attr_sm_lrf_ecc_single_err_count_array); + ecc_stat_remove(dev, + 0, + &g->gr.t18x.ecc_stats.sm_lrf_double_err_count, + dev_attr_sm_lrf_ecc_double_err_count_array); + + ecc_stat_remove(dev, + 0, + &g->gr.t18x.ecc_stats.sm_shm_sec_count, + dev_attr_sm_shm_ecc_sec_count_array); + ecc_stat_remove(dev, + 0, + &g->gr.t18x.ecc_stats.sm_shm_sed_count, + dev_attr_sm_shm_ecc_sed_count_array); + ecc_stat_remove(dev, + 0, + &g->gr.t18x.ecc_stats.sm_shm_ded_count, + dev_attr_sm_shm_ecc_ded_count_array); + + ecc_stat_remove(dev, + 0, + &g->gr.t18x.ecc_stats.tex_total_sec_pipe0_count, + dev_attr_tex_ecc_total_sec_pipe0_count_array); + ecc_stat_remove(dev, + 0, + &g->gr.t18x.ecc_stats.tex_total_ded_pipe0_count, + dev_attr_tex_ecc_total_ded_pipe0_count_array); + ecc_stat_remove(dev, + 0, + &g->gr.t18x.ecc_stats.tex_unique_sec_pipe0_count, + dev_attr_tex_ecc_unique_sec_pipe0_count_array); + ecc_stat_remove(dev, + 0, + &g->gr.t18x.ecc_stats.tex_unique_ded_pipe0_count, + dev_attr_tex_ecc_unique_ded_pipe0_count_array); + ecc_stat_remove(dev, + 0, + &g->gr.t18x.ecc_stats.tex_total_sec_pipe1_count, + dev_attr_tex_ecc_total_sec_pipe1_count_array); + ecc_stat_remove(dev, + 0, + &g->gr.t18x.ecc_stats.tex_total_ded_pipe1_count, + dev_attr_tex_ecc_total_ded_pipe1_count_array); + ecc_stat_remove(dev, + 0, + &g->gr.t18x.ecc_stats.tex_unique_sec_pipe1_count, + dev_attr_tex_ecc_unique_sec_pipe1_count_array); + ecc_stat_remove(dev, + 0, + &g->gr.t18x.ecc_stats.tex_unique_ded_pipe1_count, + dev_attr_tex_ecc_unique_ded_pipe1_count_array); + + ecc_stat_remove(dev, + 1, + &g->gr.t18x.ecc_stats.l2_sec_count, + dev_attr_l2_ecc_sec_count_array); + ecc_stat_remove(dev, + 1, + &g->gr.t18x.ecc_stats.l2_ded_count, + dev_attr_l2_ecc_ded_count_array); +} -- cgit v1.2.2