summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gp10b
diff options
context:
space:
mode:
authorAdeel Raza <araza@nvidia.com>2015-06-18 19:31:50 -0400
committerDeepak Nibade <dnibade@nvidia.com>2016-12-27 04:52:10 -0500
commitf17e0d822b47465cca23afa2054bfa1267b52b95 (patch)
treeb0e2c8953d1e2296c9d1a3b7207ff8546e6d0249 /drivers/gpu/nvgpu/gp10b
parent4c5bc9c93b86d9de022d6baff343217f1d047a62 (diff)
gpu: nvgpu: gp10b: add ECC support
Add ECC exception handling support for SM, TEX, and LTC. Bug 1635727 Bug 1637486 Change-Id: I8862ead5784f48742355432ec07c71a82b1b6735 Signed-off-by: Adeel Raza <araza@nvidia.com> Reviewed-on: http://git-master/r/935362 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> GVS: Gerrit_Virtual_Submit
Diffstat (limited to 'drivers/gpu/nvgpu/gp10b')
-rw-r--r--drivers/gpu/nvgpu/gp10b/gr_gp10b.c102
-rw-r--r--drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h110
-rw-r--r--drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h16
-rw-r--r--drivers/gpu/nvgpu/gp10b/ltc_gp10b.c23
4 files changed, 250 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 6bdb9a7c..f8c31bd3 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -55,6 +55,106 @@ static bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num)
55 return valid; 55 return valid;
56} 56}
57 57
58static int gr_gp10b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,
59 bool *post_event, struct channel_gk20a *fault_ch)
60{
61 int ret = 0;
62 u32 offset = proj_gpc_stride_v() * gpc +
63 proj_tpc_in_gpc_stride_v() * tpc;
64 u32 lrf_ecc_status, shm_ecc_status;
65
66 gr_gk20a_handle_sm_exception(g, gpc, tpc, post_event, NULL);
67
68 /* Check for LRF ECC errors. */
69 lrf_ecc_status = gk20a_readl(g,
70 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset);
71 if ( (lrf_ecc_status &
72 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp0_pending_f()) ||
73 (lrf_ecc_status &
74 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp1_pending_f()) ||
75 (lrf_ecc_status &
76 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp2_pending_f()) ||
77 (lrf_ecc_status &
78 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp3_pending_f()) ) {
79
80 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
81 "Single bit error detected in SM LRF!");
82 }
83 if ( (lrf_ecc_status &
84 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp0_pending_f()) ||
85 (lrf_ecc_status &
86 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp1_pending_f()) ||
87 (lrf_ecc_status &
88 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp2_pending_f()) ||
89 (lrf_ecc_status &
90 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp3_pending_f()) ) {
91
92 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
93 "Double bit error detected in SM LRF!");
94 }
95 gk20a_writel(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset,
96 lrf_ecc_status);
97
98 /* Check for SHM ECC errors. */
99 shm_ecc_status = gk20a_readl(g,
100 gr_pri_gpc0_tpc0_sm_shm_ecc_status_r() + offset);
101 if ((shm_ecc_status &
102 gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_corrected_shm0_pending_f()) ||
103 (shm_ecc_status &
104 gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_corrected_shm1_pending_f()) ||
105 (shm_ecc_status &
106 gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_detected_shm0_pending_f()) ||
107 (shm_ecc_status &
108 gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_detected_shm1_pending_f()) ) {
109
110 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
111 "Single bit error detected in SM SHM!");
112 }
113 if ( (shm_ecc_status &
114 gr_pri_gpc0_tpc0_sm_shm_ecc_status_double_err_detected_shm0_pending_f()) ||
115 (shm_ecc_status &
116 gr_pri_gpc0_tpc0_sm_shm_ecc_status_double_err_detected_shm1_pending_f()) ) {
117
118 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
119 "Double bit error detected in SM SHM!");
120 }
121 gk20a_writel(g, gr_pri_gpc0_tpc0_sm_shm_ecc_status_r() + offset,
122 shm_ecc_status);
123
124
125 return ret;
126}
127
128static int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
129 bool *post_event)
130{
131 int ret = 0;
132 u32 offset = proj_gpc_stride_v() * gpc +
133 proj_tpc_in_gpc_stride_v() * tpc;
134 u32 esr;
135
136 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
137
138 esr = gk20a_readl(g,
139 gr_gpc0_tpc0_tex_m_hww_esr_r() + offset);
140 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "0x%08x", esr);
141
142 if (esr & gr_gpc0_tpc0_tex_m_hww_esr_ecc_sec_pending_f()) {
143 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
144 "Single bit error detected in TEX!");
145 }
146 if (esr & gr_gpc0_tpc0_tex_m_hww_esr_ecc_ded_pending_f()) {
147 gk20a_dbg(gpu_dbg_fn | gpu_dbg_intr,
148 "Double bit error detected in TEX!");
149 }
150
151 gk20a_writel(g,
152 gr_gpc0_tpc0_tex_m_hww_esr_r() + offset,
153 esr);
154
155 return ret;
156}
157
58static int gr_gp10b_commit_global_cb_manager(struct gk20a *g, 158static int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
59 struct channel_gk20a *c, bool patch) 159 struct channel_gk20a *c, bool patch)
60{ 160{
@@ -1154,4 +1254,6 @@ void gp10b_init_gr(struct gpu_ops *gops)
1154 gops->gr.init_cyclestats = gr_gp10b_init_cyclestats; 1254 gops->gr.init_cyclestats = gr_gp10b_init_cyclestats;
1155 gops->gr.set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask; 1255 gops->gr.set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask;
1156 gops->gr.get_access_map = gr_gp10b_get_access_map; 1256 gops->gr.get_access_map = gr_gp10b_get_access_map;
1257 gops->gr.handle_sm_exception = gr_gp10b_handle_sm_exception;
1258 gops->gr.handle_tex_exception = gr_gp10b_handle_tex_exception;
1157} 1259}
diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
index 347e530d..9569bb9c 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved.
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify it 4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License, 5 * under the terms and conditions of the GNU General Public License,
@@ -466,6 +466,70 @@ static inline u32 gr_pri_gpc0_tpc0_tex_m_tex_subunits_status_r(void)
466{ 466{
467 return 0x00504238; 467 return 0x00504238;
468} 468}
469static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r(void)
470{
471 return 0x005046b8;
472}
473static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp0_pending_f(void)
474{
475 return 0x10;
476}
477static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp1_pending_f(void)
478{
479 return 0x20;
480}
481static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp2_pending_f(void)
482{
483 return 0x40;
484}
485static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp3_pending_f(void)
486{
487 return 0x80;
488}
489static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp0_pending_f(void)
490{
491 return 0x100;
492}
493static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp1_pending_f(void)
494{
495 return 0x200;
496}
497static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp2_pending_f(void)
498{
499 return 0x400;
500}
501static inline u32 gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp3_pending_f(void)
502{
503 return 0x800;
504}
505static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_status_r(void)
506{
507 return 0x005044a0;
508}
509static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_corrected_shm0_pending_f(void)
510{
511 return 0x1;
512}
513static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_corrected_shm1_pending_f(void)
514{
515 return 0x2;
516}
517static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_detected_shm0_pending_f(void)
518{
519 return 0x10;
520}
521static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_detected_shm1_pending_f(void)
522{
523 return 0x20;
524}
525static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_status_double_err_detected_shm0_pending_f(void)
526{
527 return 0x100;
528}
529static inline u32 gr_pri_gpc0_tpc0_sm_shm_ecc_status_double_err_detected_shm1_pending_f(void)
530{
531 return 0x200;
532}
469static inline u32 gr_pri_be0_crop_status1_r(void) 533static inline u32 gr_pri_be0_crop_status1_r(void)
470{ 534{
471 return 0x00410134; 535 return 0x00410134;
@@ -3158,6 +3222,14 @@ static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_int_report_f(vo
3158{ 3222{
3159 return 0x10; 3223 return 0x10;
3160} 3224}
3225static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_ecc_sec_error_report_f(void)
3226{
3227 return 0x20000000;
3228}
3229static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_ecc_ded_error_report_f(void)
3230{
3231 return 0x40000000;
3232}
3161static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_pause_report_f(void) 3233static inline u32 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_pause_report_f(void)
3162{ 3234{
3163 return 0x20; 3235 return 0x20;
@@ -3174,6 +3246,10 @@ static inline u32 gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f(void)
3174{ 3246{
3175 return 0x2; 3247 return 0x2;
3176} 3248}
3249static inline u32 gr_gpcs_tpcs_tpccs_tpc_exception_en_tex_enabled_f(void)
3250{
3251 return 0x1;
3252}
3177static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_r(void) 3253static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_r(void)
3178{ 3254{
3179 return 0x0050450c; 3255 return 0x0050450c;
@@ -3210,6 +3286,14 @@ static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_r(void)
3210{ 3286{
3211 return 0x00504508; 3287 return 0x00504508;
3212} 3288}
3289static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_tex_v(u32 r)
3290{
3291 return (r >> 0) & 0x1;
3292}
3293static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_tex_pending_v(void)
3294{
3295 return 0x00000001;
3296}
3213static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_sm_v(u32 r) 3297static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_sm_v(u32 r)
3214{ 3298{
3215 return (r >> 1) & 0x1; 3299 return (r >> 1) & 0x1;
@@ -3322,6 +3406,14 @@ static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f(void)
3322{ 3406{
3323 return 0x10; 3407 return 0x10;
3324} 3408}
3409static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_ecc_sec_error_pending_f(void)
3410{
3411 return 0x20000000;
3412}
3413static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_ecc_ded_error_pending_f(void)
3414{
3415 return 0x40000000;
3416}
3325static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f(void) 3417static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f(void)
3326{ 3418{
3327 return 0x20; 3419 return 0x20;
@@ -3330,6 +3422,22 @@ static inline u32 gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f(
3330{ 3422{
3331 return 0x40; 3423 return 0x40;
3332} 3424}
3425static inline u32 gr_gpc0_tpc0_tex_m_hww_esr_r(void)
3426{
3427 return 0x00504224;
3428}
3429static inline u32 gr_gpc0_tpc0_tex_m_hww_esr_intr_pending_f(void)
3430{
3431 return 0x1;
3432}
3433static inline u32 gr_gpc0_tpc0_tex_m_hww_esr_ecc_sec_pending_f(void)
3434{
3435 return 0x80;
3436}
3437static inline u32 gr_gpc0_tpc0_tex_m_hww_esr_ecc_ded_pending_f(void)
3438{
3439 return 0x100;
3440}
3333static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_r(void) 3441static inline u32 gr_gpc0_tpc0_sm_hww_warp_esr_r(void)
3334{ 3442{
3335 return 0x00504648; 3443 return 0x00504648;
diff --git a/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h
index ea96a9aa..302c2243 100644
--- a/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/hw_ltc_gp10b.h
@@ -286,6 +286,14 @@ static inline u32 ltc_ltcs_ltss_intr_r(void)
286{ 286{
287 return 0x0017e20c; 287 return 0x0017e20c;
288} 288}
289static inline u32 ltc_ltcs_ltss_intr_ecc_sec_error_pending_f(void)
290{
291 return 0x100;
292}
293static inline u32 ltc_ltcs_ltss_intr_ecc_ded_error_pending_f(void)
294{
295 return 0x200;
296}
289static inline u32 ltc_ltcs_ltss_intr_en_evicted_cb_m(void) 297static inline u32 ltc_ltcs_ltss_intr_en_evicted_cb_m(void)
290{ 298{
291 return 0x1 << 20; 299 return 0x1 << 20;
@@ -294,6 +302,14 @@ static inline u32 ltc_ltcs_ltss_intr_en_illegal_compstat_access_m(void)
294{ 302{
295 return 0x1 << 30; 303 return 0x1 << 30;
296} 304}
305static inline u32 ltc_ltcs_ltss_intr_en_ecc_sec_error_enabled_f(void)
306{
307 return 0x1000000;
308}
309static inline u32 ltc_ltcs_ltss_intr_en_ecc_ded_error_enabled_f(void)
310{
311 return 0x2000000;
312}
297static inline u32 ltc_ltc0_lts0_intr_r(void) 313static inline u32 ltc_ltc0_lts0_intr_r(void)
298{ 314{
299 return 0x0014040c; 315 return 0x0014040c;
diff --git a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
index 47992988..d0be86a4 100644
--- a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
@@ -136,6 +136,20 @@ static void gp10b_ltc_isr(struct gk20a *g)
136 ltc_intr = gk20a_readl(g, ltc_ltc0_lts0_intr_r() + 136 ltc_intr = gk20a_readl(g, ltc_ltc0_lts0_intr_r() +
137 proj_ltc_stride_v() * ltc + 137 proj_ltc_stride_v() * ltc +
138 proj_lts_stride_v() * slice); 138 proj_lts_stride_v() * slice);
139
140 /* Detect and handle ECC errors */
141 if (ltc_intr &
142 ltc_ltcs_ltss_intr_ecc_sec_error_pending_f()) {
143 gk20a_err(dev_from_gk20a(g),
144 "Single bit error detected in GPU L2!");
145 g->ops.mm.l2_flush(g, true);
146 }
147 if (ltc_intr &
148 ltc_ltcs_ltss_intr_ecc_ded_error_pending_f()) {
149 gk20a_err(dev_from_gk20a(g),
150 "Double bit error detected in GPU L2!");
151 }
152
139 gk20a_err(dev_from_gk20a(g), "ltc%d, slice %d: %08x", 153 gk20a_err(dev_from_gk20a(g), "ltc%d, slice %d: %08x",
140 ltc, slice, ltc_intr); 154 ltc, slice, ltc_intr);
141 gk20a_writel(g, ltc_ltc0_lts0_intr_r() + 155 gk20a_writel(g, ltc_ltc0_lts0_intr_r() +
@@ -148,10 +162,19 @@ static void gp10b_ltc_isr(struct gk20a *g)
148 162
149static void gp10b_ltc_init_fs_state(struct gk20a *g) 163static void gp10b_ltc_init_fs_state(struct gk20a *g)
150{ 164{
165 u32 ltc_intr;
166
151 gm20b_ltc_init_fs_state(g); 167 gm20b_ltc_init_fs_state(g);
152 168
153 gk20a_writel(g, ltc_ltca_g_axi_pctrl_r(), 169 gk20a_writel(g, ltc_ltca_g_axi_pctrl_r(),
154 ltc_ltca_g_axi_pctrl_user_sid_f(TEGRA_SID_GPUB)); 170 ltc_ltca_g_axi_pctrl_user_sid_f(TEGRA_SID_GPUB));
171
172 /* Enable ECC interrupts */
173 ltc_intr = gk20a_readl(g, ltc_ltcs_ltss_intr_r());
174 ltc_intr |= ltc_ltcs_ltss_intr_en_ecc_sec_error_enabled_f() |
175 ltc_ltcs_ltss_intr_en_ecc_ded_error_enabled_f();
176 gk20a_writel(g, ltc_ltcs_ltss_intr_r(),
177 ltc_intr);
155} 178}
156 179
157void gp10b_init_ltc(struct gpu_ops *gops) 180void gp10b_init_ltc(struct gpu_ops *gops)