summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a
diff options
context:
space:
mode:
authorRichard Zhao <rizhao@nvidia.com>2018-06-26 20:37:40 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-07-19 19:43:58 -0400
commit7f14aafc2c02eb0fab458324d0ba91a7fdea3086 (patch)
treecda9f48839fbde3444fde521a9b0069eb06cd81a /drivers/gpu/nvgpu/gk20a
parent5ff1b3fe5a30c926e59a55ad25dd4daf430c8579 (diff)
gpu: nvgpu: rework ecc structure and sysfs
- create common file common/ecc.c which include common functions for add ecc counters and remove counters. - common code will create a list of all counter which make it easier to iterate all counters. - Add chip specific file for adding ecc counters. - add linux specific file os/linux/ecc_sysfs.c to export counters to sysfs. - remove obsolete code - MISRA violation for using snprintf is not solved, tracking with jira NVGPU-859 Jira NVGPUT-115 Change-Id: I1905c43c5c9b2b131199807533dee8e63ddc12f4 Signed-off-by: Richard Zhao <rizhao@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1763536 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r--drivers/gpu/nvgpu/gk20a/ecc_gk20a.h102
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h8
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c8
3 files changed, 9 insertions, 109 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ecc_gk20a.h b/drivers/gpu/nvgpu/gk20a/ecc_gk20a.h
deleted file mode 100644
index 9c50a809..00000000
--- a/drivers/gpu/nvgpu/gk20a/ecc_gk20a.h
+++ /dev/null
@@ -1,102 +0,0 @@
1/*
2 * GK20A ECC
3 *
4 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24#ifndef ECC_GK20A_H
25#define ECC_GK20A_H
26
27struct gk20a_ecc_stat {
28 char **names;
29 u32 *counters;
30 u32 count;
31#ifdef CONFIG_SYSFS
32 struct hlist_node hash_node;
33 struct device_attribute *attr_array;
34#endif
35};
36
37struct ecc_gk20a {
38 /* Stats per engine */
39 struct {
40 struct gk20a_ecc_stat sm_lrf_single_err_count;
41 struct gk20a_ecc_stat sm_lrf_double_err_count;
42
43 struct gk20a_ecc_stat sm_shm_sec_count;
44 struct gk20a_ecc_stat sm_shm_sed_count;
45 struct gk20a_ecc_stat sm_shm_ded_count;
46
47 struct gk20a_ecc_stat tex_total_sec_pipe0_count;
48 struct gk20a_ecc_stat tex_total_ded_pipe0_count;
49 struct gk20a_ecc_stat tex_unique_sec_pipe0_count;
50 struct gk20a_ecc_stat tex_unique_ded_pipe0_count;
51 struct gk20a_ecc_stat tex_total_sec_pipe1_count;
52 struct gk20a_ecc_stat tex_total_ded_pipe1_count;
53 struct gk20a_ecc_stat tex_unique_sec_pipe1_count;
54 struct gk20a_ecc_stat tex_unique_ded_pipe1_count;
55
56 struct gk20a_ecc_stat sm_l1_tag_corrected_err_count;
57 struct gk20a_ecc_stat sm_l1_tag_uncorrected_err_count;
58 struct gk20a_ecc_stat sm_cbu_corrected_err_count;
59 struct gk20a_ecc_stat sm_cbu_uncorrected_err_count;
60 struct gk20a_ecc_stat sm_l1_data_corrected_err_count;
61 struct gk20a_ecc_stat sm_l1_data_uncorrected_err_count;
62 struct gk20a_ecc_stat sm_icache_corrected_err_count;
63 struct gk20a_ecc_stat sm_icache_uncorrected_err_count;
64 struct gk20a_ecc_stat gcc_l15_corrected_err_count;
65 struct gk20a_ecc_stat gcc_l15_uncorrected_err_count;
66 struct gk20a_ecc_stat fecs_corrected_err_count;
67 struct gk20a_ecc_stat fecs_uncorrected_err_count;
68 struct gk20a_ecc_stat gpccs_corrected_err_count;
69 struct gk20a_ecc_stat gpccs_uncorrected_err_count;
70 struct gk20a_ecc_stat mmu_l1tlb_corrected_err_count;
71 struct gk20a_ecc_stat mmu_l1tlb_uncorrected_err_count;
72 } gr;
73
74 struct {
75 struct gk20a_ecc_stat l2_sec_count;
76 struct gk20a_ecc_stat l2_ded_count;
77 struct gk20a_ecc_stat l2_cache_corrected_err_count;
78 struct gk20a_ecc_stat l2_cache_uncorrected_err_count;
79 } ltc;
80
81 struct {
82 struct gk20a_ecc_stat mmu_l2tlb_corrected_err_count;
83 struct gk20a_ecc_stat mmu_l2tlb_uncorrected_err_count;
84 struct gk20a_ecc_stat mmu_hubtlb_corrected_err_count;
85 struct gk20a_ecc_stat mmu_hubtlb_uncorrected_err_count;
86 struct gk20a_ecc_stat mmu_fillunit_corrected_err_count;
87 struct gk20a_ecc_stat mmu_fillunit_uncorrected_err_count;
88 } fb;
89
90 struct {
91 struct gk20a_ecc_stat pmu_corrected_err_count;
92 struct gk20a_ecc_stat pmu_uncorrected_err_count;
93 } pmu;
94
95 struct {
96 struct gk20a_ecc_stat fbpa_sec_err_count;
97 struct gk20a_ecc_stat fbpa_ded_err_count;
98 } fbpa;
99
100};
101
102#endif /*__ECC_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 7cb8462f..e69036d7 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -35,7 +35,6 @@ struct gk20a_ctxsw_trace;
35struct acr_desc; 35struct acr_desc;
36struct nvgpu_mem_alloc_tracker; 36struct nvgpu_mem_alloc_tracker;
37struct dbg_profiler_object_data; 37struct dbg_profiler_object_data;
38struct ecc_gk20a;
39struct gk20a_debug_output; 38struct gk20a_debug_output;
40struct nvgpu_clk_pll_debug_data; 39struct nvgpu_clk_pll_debug_data;
41struct nvgpu_nvhost_dev; 40struct nvgpu_nvhost_dev;
@@ -64,6 +63,7 @@ struct nvgpu_ctxsw_trace_filter;
64#include <nvgpu/clk_arb.h> 63#include <nvgpu/clk_arb.h>
65#include <nvgpu/nvlink.h> 64#include <nvgpu/nvlink.h>
66#include <nvgpu/sim.h> 65#include <nvgpu/sim.h>
66#include <nvgpu/ecc.h>
67 67
68#include "clk_gk20a.h" 68#include "clk_gk20a.h"
69#include "ce2_gk20a.h" 69#include "ce2_gk20a.h"
@@ -77,7 +77,6 @@ struct nvgpu_ctxsw_trace_filter;
77#include "perf/perf.h" 77#include "perf/perf.h"
78#include "pmgr/pmgr.h" 78#include "pmgr/pmgr.h"
79#include "therm/thrm.h" 79#include "therm/thrm.h"
80#include "ecc_gk20a.h"
81 80
82/* PTIMER_REF_FREQ_HZ corresponds to a period of 32 nanoseconds. 81/* PTIMER_REF_FREQ_HZ corresponds to a period of 32 nanoseconds.
83 32 ns is the resolution of ptimer. */ 82 32 ns is the resolution of ptimer. */
@@ -384,8 +383,7 @@ struct gpu_ops {
384 u32 gpc_exception); 383 u32 gpc_exception);
385 void (*enable_gpc_exceptions)(struct gk20a *g); 384 void (*enable_gpc_exceptions)(struct gk20a *g);
386 void (*enable_exceptions)(struct gk20a *g); 385 void (*enable_exceptions)(struct gk20a *g);
387 void (*create_gr_sysfs)(struct gk20a *g); 386 int (*init_ecc)(struct gk20a *g);
388 void (*remove_gr_sysfs)(struct gk20a *g);
389 u32 (*get_lrf_tex_ltc_dram_override)(struct gk20a *g); 387 u32 (*get_lrf_tex_ltc_dram_override)(struct gk20a *g);
390 int (*record_sm_error_state)(struct gk20a *g, u32 gpc, u32 tpc, 388 int (*record_sm_error_state)(struct gk20a *g, u32 gpc, u32 tpc,
391 u32 sm, struct channel_gk20a *fault_ch); 389 u32 sm, struct channel_gk20a *fault_ch);
@@ -1385,7 +1383,7 @@ struct gk20a {
1385 struct mm_gk20a mm; 1383 struct mm_gk20a mm;
1386 struct nvgpu_pmu pmu; 1384 struct nvgpu_pmu pmu;
1387 struct acr_desc acr; 1385 struct acr_desc acr;
1388 struct ecc_gk20a ecc; 1386 struct nvgpu_ecc ecc;
1389 struct clk_pmupstate clk_pmu; 1387 struct clk_pmupstate clk_pmu;
1390 struct perf_pmupstate perf_pmu; 1388 struct perf_pmupstate perf_pmu;
1391 struct pmgr_pmupstate pmgr_pmu; 1389 struct pmgr_pmupstate pmgr_pmu;
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index c70c1cd4..38570041 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -38,6 +38,7 @@
38#include <nvgpu/mm.h> 38#include <nvgpu/mm.h>
39#include <nvgpu/ctxsw_trace.h> 39#include <nvgpu/ctxsw_trace.h>
40#include <nvgpu/error_notifier.h> 40#include <nvgpu/error_notifier.h>
41#include <nvgpu/ecc.h>
41 42
42#include "gk20a.h" 43#include "gk20a.h"
43#include "gr_gk20a.h" 44#include "gr_gk20a.h"
@@ -3127,6 +3128,8 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr)
3127 gr->ctx_vars.hwpm_ctxsw_buffer_offset_map = NULL; 3128 gr->ctx_vars.hwpm_ctxsw_buffer_offset_map = NULL;
3128 3129
3129 gk20a_comptag_allocator_destroy(g, &gr->comp_tags); 3130 gk20a_comptag_allocator_destroy(g, &gr->comp_tags);
3131
3132 nvgpu_ecc_remove_support(g);
3130} 3133}
3131 3134
3132static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) 3135static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
@@ -4872,8 +4875,9 @@ static int gk20a_init_gr_setup_sw(struct gk20a *g)
4872 gr->remove_support = gk20a_remove_gr_support; 4875 gr->remove_support = gk20a_remove_gr_support;
4873 gr->sw_ready = true; 4876 gr->sw_ready = true;
4874 4877
4875 if (g->ops.gr.create_gr_sysfs) 4878 err = nvgpu_ecc_init_support(g);
4876 g->ops.gr.create_gr_sysfs(g); 4879 if (err)
4880 goto clean_up;
4877 4881
4878 nvgpu_log_fn(g, "done"); 4882 nvgpu_log_fn(g, "done");
4879 return 0; 4883 return 0;