diff options
author | Richard Zhao <rizhao@nvidia.com> | 2018-06-26 20:37:40 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-07-19 19:43:58 -0400 |
commit | 7f14aafc2c02eb0fab458324d0ba91a7fdea3086 (patch) | |
tree | cda9f48839fbde3444fde521a9b0069eb06cd81a /drivers/gpu/nvgpu/gk20a | |
parent | 5ff1b3fe5a30c926e59a55ad25dd4daf430c8579 (diff) |
gpu: nvgpu: rework ecc structure and sysfs
- create common file common/ecc.c which include common functions for add
ecc counters and remove counters.
- common code will create a list of all counter which make it easier to
iterate all counters.
- Add chip specific file for adding ecc counters.
- add linux specific file os/linux/ecc_sysfs.c to export counters to
sysfs.
- remove obsolete code
- MISRA violation for using snprintf is not solved, tracking with
jira NVGPU-859
Jira NVGPUT-115
Change-Id: I1905c43c5c9b2b131199807533dee8e63ddc12f4
Signed-off-by: Richard Zhao <rizhao@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1763536
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ecc_gk20a.h | 102 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 8 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 8 |
3 files changed, 9 insertions, 109 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ecc_gk20a.h b/drivers/gpu/nvgpu/gk20a/ecc_gk20a.h deleted file mode 100644 index 9c50a809..00000000 --- a/drivers/gpu/nvgpu/gk20a/ecc_gk20a.h +++ /dev/null | |||
@@ -1,102 +0,0 @@ | |||
1 | /* | ||
2 | * GK20A ECC | ||
3 | * | ||
4 | * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | #ifndef ECC_GK20A_H | ||
25 | #define ECC_GK20A_H | ||
26 | |||
27 | struct gk20a_ecc_stat { | ||
28 | char **names; | ||
29 | u32 *counters; | ||
30 | u32 count; | ||
31 | #ifdef CONFIG_SYSFS | ||
32 | struct hlist_node hash_node; | ||
33 | struct device_attribute *attr_array; | ||
34 | #endif | ||
35 | }; | ||
36 | |||
37 | struct ecc_gk20a { | ||
38 | /* Stats per engine */ | ||
39 | struct { | ||
40 | struct gk20a_ecc_stat sm_lrf_single_err_count; | ||
41 | struct gk20a_ecc_stat sm_lrf_double_err_count; | ||
42 | |||
43 | struct gk20a_ecc_stat sm_shm_sec_count; | ||
44 | struct gk20a_ecc_stat sm_shm_sed_count; | ||
45 | struct gk20a_ecc_stat sm_shm_ded_count; | ||
46 | |||
47 | struct gk20a_ecc_stat tex_total_sec_pipe0_count; | ||
48 | struct gk20a_ecc_stat tex_total_ded_pipe0_count; | ||
49 | struct gk20a_ecc_stat tex_unique_sec_pipe0_count; | ||
50 | struct gk20a_ecc_stat tex_unique_ded_pipe0_count; | ||
51 | struct gk20a_ecc_stat tex_total_sec_pipe1_count; | ||
52 | struct gk20a_ecc_stat tex_total_ded_pipe1_count; | ||
53 | struct gk20a_ecc_stat tex_unique_sec_pipe1_count; | ||
54 | struct gk20a_ecc_stat tex_unique_ded_pipe1_count; | ||
55 | |||
56 | struct gk20a_ecc_stat sm_l1_tag_corrected_err_count; | ||
57 | struct gk20a_ecc_stat sm_l1_tag_uncorrected_err_count; | ||
58 | struct gk20a_ecc_stat sm_cbu_corrected_err_count; | ||
59 | struct gk20a_ecc_stat sm_cbu_uncorrected_err_count; | ||
60 | struct gk20a_ecc_stat sm_l1_data_corrected_err_count; | ||
61 | struct gk20a_ecc_stat sm_l1_data_uncorrected_err_count; | ||
62 | struct gk20a_ecc_stat sm_icache_corrected_err_count; | ||
63 | struct gk20a_ecc_stat sm_icache_uncorrected_err_count; | ||
64 | struct gk20a_ecc_stat gcc_l15_corrected_err_count; | ||
65 | struct gk20a_ecc_stat gcc_l15_uncorrected_err_count; | ||
66 | struct gk20a_ecc_stat fecs_corrected_err_count; | ||
67 | struct gk20a_ecc_stat fecs_uncorrected_err_count; | ||
68 | struct gk20a_ecc_stat gpccs_corrected_err_count; | ||
69 | struct gk20a_ecc_stat gpccs_uncorrected_err_count; | ||
70 | struct gk20a_ecc_stat mmu_l1tlb_corrected_err_count; | ||
71 | struct gk20a_ecc_stat mmu_l1tlb_uncorrected_err_count; | ||
72 | } gr; | ||
73 | |||
74 | struct { | ||
75 | struct gk20a_ecc_stat l2_sec_count; | ||
76 | struct gk20a_ecc_stat l2_ded_count; | ||
77 | struct gk20a_ecc_stat l2_cache_corrected_err_count; | ||
78 | struct gk20a_ecc_stat l2_cache_uncorrected_err_count; | ||
79 | } ltc; | ||
80 | |||
81 | struct { | ||
82 | struct gk20a_ecc_stat mmu_l2tlb_corrected_err_count; | ||
83 | struct gk20a_ecc_stat mmu_l2tlb_uncorrected_err_count; | ||
84 | struct gk20a_ecc_stat mmu_hubtlb_corrected_err_count; | ||
85 | struct gk20a_ecc_stat mmu_hubtlb_uncorrected_err_count; | ||
86 | struct gk20a_ecc_stat mmu_fillunit_corrected_err_count; | ||
87 | struct gk20a_ecc_stat mmu_fillunit_uncorrected_err_count; | ||
88 | } fb; | ||
89 | |||
90 | struct { | ||
91 | struct gk20a_ecc_stat pmu_corrected_err_count; | ||
92 | struct gk20a_ecc_stat pmu_uncorrected_err_count; | ||
93 | } pmu; | ||
94 | |||
95 | struct { | ||
96 | struct gk20a_ecc_stat fbpa_sec_err_count; | ||
97 | struct gk20a_ecc_stat fbpa_ded_err_count; | ||
98 | } fbpa; | ||
99 | |||
100 | }; | ||
101 | |||
102 | #endif /*__ECC_GK20A_H__*/ | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 7cb8462f..e69036d7 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -35,7 +35,6 @@ struct gk20a_ctxsw_trace; | |||
35 | struct acr_desc; | 35 | struct acr_desc; |
36 | struct nvgpu_mem_alloc_tracker; | 36 | struct nvgpu_mem_alloc_tracker; |
37 | struct dbg_profiler_object_data; | 37 | struct dbg_profiler_object_data; |
38 | struct ecc_gk20a; | ||
39 | struct gk20a_debug_output; | 38 | struct gk20a_debug_output; |
40 | struct nvgpu_clk_pll_debug_data; | 39 | struct nvgpu_clk_pll_debug_data; |
41 | struct nvgpu_nvhost_dev; | 40 | struct nvgpu_nvhost_dev; |
@@ -64,6 +63,7 @@ struct nvgpu_ctxsw_trace_filter; | |||
64 | #include <nvgpu/clk_arb.h> | 63 | #include <nvgpu/clk_arb.h> |
65 | #include <nvgpu/nvlink.h> | 64 | #include <nvgpu/nvlink.h> |
66 | #include <nvgpu/sim.h> | 65 | #include <nvgpu/sim.h> |
66 | #include <nvgpu/ecc.h> | ||
67 | 67 | ||
68 | #include "clk_gk20a.h" | 68 | #include "clk_gk20a.h" |
69 | #include "ce2_gk20a.h" | 69 | #include "ce2_gk20a.h" |
@@ -77,7 +77,6 @@ struct nvgpu_ctxsw_trace_filter; | |||
77 | #include "perf/perf.h" | 77 | #include "perf/perf.h" |
78 | #include "pmgr/pmgr.h" | 78 | #include "pmgr/pmgr.h" |
79 | #include "therm/thrm.h" | 79 | #include "therm/thrm.h" |
80 | #include "ecc_gk20a.h" | ||
81 | 80 | ||
82 | /* PTIMER_REF_FREQ_HZ corresponds to a period of 32 nanoseconds. | 81 | /* PTIMER_REF_FREQ_HZ corresponds to a period of 32 nanoseconds. |
83 | 32 ns is the resolution of ptimer. */ | 82 | 32 ns is the resolution of ptimer. */ |
@@ -384,8 +383,7 @@ struct gpu_ops { | |||
384 | u32 gpc_exception); | 383 | u32 gpc_exception); |
385 | void (*enable_gpc_exceptions)(struct gk20a *g); | 384 | void (*enable_gpc_exceptions)(struct gk20a *g); |
386 | void (*enable_exceptions)(struct gk20a *g); | 385 | void (*enable_exceptions)(struct gk20a *g); |
387 | void (*create_gr_sysfs)(struct gk20a *g); | 386 | int (*init_ecc)(struct gk20a *g); |
388 | void (*remove_gr_sysfs)(struct gk20a *g); | ||
389 | u32 (*get_lrf_tex_ltc_dram_override)(struct gk20a *g); | 387 | u32 (*get_lrf_tex_ltc_dram_override)(struct gk20a *g); |
390 | int (*record_sm_error_state)(struct gk20a *g, u32 gpc, u32 tpc, | 388 | int (*record_sm_error_state)(struct gk20a *g, u32 gpc, u32 tpc, |
391 | u32 sm, struct channel_gk20a *fault_ch); | 389 | u32 sm, struct channel_gk20a *fault_ch); |
@@ -1385,7 +1383,7 @@ struct gk20a { | |||
1385 | struct mm_gk20a mm; | 1383 | struct mm_gk20a mm; |
1386 | struct nvgpu_pmu pmu; | 1384 | struct nvgpu_pmu pmu; |
1387 | struct acr_desc acr; | 1385 | struct acr_desc acr; |
1388 | struct ecc_gk20a ecc; | 1386 | struct nvgpu_ecc ecc; |
1389 | struct clk_pmupstate clk_pmu; | 1387 | struct clk_pmupstate clk_pmu; |
1390 | struct perf_pmupstate perf_pmu; | 1388 | struct perf_pmupstate perf_pmu; |
1391 | struct pmgr_pmupstate pmgr_pmu; | 1389 | struct pmgr_pmupstate pmgr_pmu; |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index c70c1cd4..38570041 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include <nvgpu/mm.h> | 38 | #include <nvgpu/mm.h> |
39 | #include <nvgpu/ctxsw_trace.h> | 39 | #include <nvgpu/ctxsw_trace.h> |
40 | #include <nvgpu/error_notifier.h> | 40 | #include <nvgpu/error_notifier.h> |
41 | #include <nvgpu/ecc.h> | ||
41 | 42 | ||
42 | #include "gk20a.h" | 43 | #include "gk20a.h" |
43 | #include "gr_gk20a.h" | 44 | #include "gr_gk20a.h" |
@@ -3127,6 +3128,8 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr) | |||
3127 | gr->ctx_vars.hwpm_ctxsw_buffer_offset_map = NULL; | 3128 | gr->ctx_vars.hwpm_ctxsw_buffer_offset_map = NULL; |
3128 | 3129 | ||
3129 | gk20a_comptag_allocator_destroy(g, &gr->comp_tags); | 3130 | gk20a_comptag_allocator_destroy(g, &gr->comp_tags); |
3131 | |||
3132 | nvgpu_ecc_remove_support(g); | ||
3130 | } | 3133 | } |
3131 | 3134 | ||
3132 | static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) | 3135 | static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) |
@@ -4872,8 +4875,9 @@ static int gk20a_init_gr_setup_sw(struct gk20a *g) | |||
4872 | gr->remove_support = gk20a_remove_gr_support; | 4875 | gr->remove_support = gk20a_remove_gr_support; |
4873 | gr->sw_ready = true; | 4876 | gr->sw_ready = true; |
4874 | 4877 | ||
4875 | if (g->ops.gr.create_gr_sysfs) | 4878 | err = nvgpu_ecc_init_support(g); |
4876 | g->ops.gr.create_gr_sysfs(g); | 4879 | if (err) |
4880 | goto clean_up; | ||
4877 | 4881 | ||
4878 | nvgpu_log_fn(g, "done"); | 4882 | nvgpu_log_fn(g, "done"); |
4879 | return 0; | 4883 | return 0; |