summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
diff options
context:
space:
mode:
authorAdeel Raza <araza@nvidia.com>2015-06-18 19:31:50 -0400
committerDeepak Nibade <dnibade@nvidia.com>2016-12-27 04:52:10 -0500
commitf17e0d822b47465cca23afa2054bfa1267b52b95 (patch)
treeb0e2c8953d1e2296c9d1a3b7207ff8546e6d0249 /drivers/gpu/nvgpu/gp10b/gr_gp10b.c
parent4c5bc9c93b86d9de022d6baff343217f1d047a62 (diff)
gpu: nvgpu: gp10b: add ECC support
Add ECC exception handling support for SM, TEX, and LTC. Bug 1635727 Bug 1637486 Change-Id: I8862ead5784f48742355432ec07c71a82b1b6735 Signed-off-by: Adeel Raza <araza@nvidia.com> Reviewed-on: http://git-master/r/935362 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> GVS: Gerrit_Virtual_Submit
Diffstat (limited to 'drivers/gpu/nvgpu/gp10b/gr_gp10b.c')
-rw-r--r--drivers/gpu/nvgpu/gp10b/gr_gp10b.c102
1 files changed, 102 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 6bdb9a7c..f8c31bd3 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -55,6 +55,106 @@ static bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num)
55 return valid; 55 return valid;
56} 56}
57 57
58static int gr_gp10b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,
59 bool *post_event, struct channel_gk20a *fault_ch)
60{
61 int ret = 0;
62 u32 offset = proj_gpc_stride_v() * gpc +
63 proj_tpc_in_gpc_stride_v() * tpc;
64 u32 lrf_ecc_status, shm_ecc_status;
65
66 gr_gk20a_handle_sm_exception(g, gpc, tpc, post_event, NULL);
67
68 /* Check for LRF ECC errors. */
69 lrf_ecc_status = gk20a_readl(g,
70 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset);
71 if ( (lrf_ecc_status &
72 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp0_pending_f()) ||
73 (lrf_ecc_status &
74 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp1_pending_f()) ||
75 (lrf_ecc_status &
76 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp2_pending_f()) ||
77 (lrf_ecc_status &
78 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp3_pending_f()) ) {
79
80 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
81 "Single bit error detected in SM LRF!");
82 }
83 if ( (lrf_ecc_status &
84 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp0_pending_f()) ||
85 (lrf_ecc_status &
86 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp1_pending_f()) ||
87 (lrf_ecc_status &
88 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp2_pending_f()) ||
89 (lrf_ecc_status &
90 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp3_pending_f()) ) {
91
92 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
93 "Double bit error detected in SM LRF!");
94 }
95 gk20a_writel(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset,
96 lrf_ecc_status);
97
98 /* Check for SHM ECC errors. */
99 shm_ecc_status = gk20a_readl(g,
100 gr_pri_gpc0_tpc0_sm_shm_ecc_status_r() + offset);
101 if ((shm_ecc_status &
102 gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_corrected_shm0_pending_f()) ||
103 (shm_ecc_status &
104 gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_corrected_shm1_pending_f()) ||
105 (shm_ecc_status &
106 gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_detected_shm0_pending_f()) ||
107 (shm_ecc_status &
108 gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_detected_shm1_pending_f()) ) {
109
110 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
111 "Single bit error detected in SM SHM!");
112 }
113 if ( (shm_ecc_status &
114 gr_pri_gpc0_tpc0_sm_shm_ecc_status_double_err_detected_shm0_pending_f()) ||
115 (shm_ecc_status &
116 gr_pri_gpc0_tpc0_sm_shm_ecc_status_double_err_detected_shm1_pending_f()) ) {
117
118 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
119 "Double bit error detected in SM SHM!");
120 }
121 gk20a_writel(g, gr_pri_gpc0_tpc0_sm_shm_ecc_status_r() + offset,
122 shm_ecc_status);
123
124
125 return ret;
126}
127
128static int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
129 bool *post_event)
130{
131 int ret = 0;
132 u32 offset = proj_gpc_stride_v() * gpc +
133 proj_tpc_in_gpc_stride_v() * tpc;
134 u32 esr;
135
136 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
137
138 esr = gk20a_readl(g,
139 gr_gpc0_tpc0_tex_m_hww_esr_r() + offset);
140 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "0x%08x", esr);
141
142 if (esr & gr_gpc0_tpc0_tex_m_hww_esr_ecc_sec_pending_f()) {
143 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
144 "Single bit error detected in TEX!");
145 }
146 if (esr & gr_gpc0_tpc0_tex_m_hww_esr_ecc_ded_pending_f()) {
147 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
148 "Double bit error detected in TEX!");
149 }
150
151 gk20a_writel(g,
152 gr_gpc0_tpc0_tex_m_hww_esr_r() + offset,
153 esr);
154
155 return ret;
156}
157
58static int gr_gp10b_commit_global_cb_manager(struct gk20a *g, 158static int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
59 struct channel_gk20a *c, bool patch) 159 struct channel_gk20a *c, bool patch)
60{ 160{
@@ -1154,4 +1254,6 @@ void gp10b_init_gr(struct gpu_ops *gops)
1154 gops->gr.init_cyclestats = gr_gp10b_init_cyclestats; 1254 gops->gr.init_cyclestats = gr_gp10b_init_cyclestats;
1155 gops->gr.set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask; 1255 gops->gr.set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask;
1156 gops->gr.get_access_map = gr_gp10b_get_access_map; 1256 gops->gr.get_access_map = gr_gp10b_get_access_map;
1257 gops->gr.handle_sm_exception = gr_gp10b_handle_sm_exception;
1258 gops->gr.handle_tex_exception = gr_gp10b_handle_tex_exception;
1157} 1259}