summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
diff options
context:
space:
mode:
authorSeema Khowala <seemaj@nvidia.com>2017-03-09 01:34:49 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2017-04-12 18:33:50 -0400
commit457f176785af5c8821889d00d89db05bbaf8f772 (patch)
treed4b7913ffc728c1bda19a70746d758df3ff2f7a0 /drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
parentfbce374aa0f6101d27ca5b3de97905d2798c6f04 (diff)
gpu: nvgpu: gv11b: init handle sched_error & ctxsw_timout ops
- detect and decode sched_error type. Any sched error starting with xxx_* is not supported in h/w and should never be seen by s/w - for bad_tsg sched error, preempt all runlists to recover as faulted ch/tsg is unknown. For other errors, just report error. - ctxsw timeout is not part of sched error fifo interrupt. A new fifo interrupt, ctxsw timeout is added in gv11b. Add s/w handling. Bug 1856152 JIRA GPUT19X-74 Change-Id: I474e1a3cda29a450691fe2ea1dc1e239ce57df1a Signed-off-by: Seema Khowala <seemaj@nvidia.com> Reviewed-on: http://git-master/r/1317615 Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b/fifo_gv11b.c')
-rw-r--r--drivers/gpu/nvgpu/gv11b/fifo_gv11b.c237
1 files changed, 234 insertions, 3 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
index 3c1982fe..6883d867 100644
--- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
@@ -15,13 +15,15 @@
15#include <linux/delay.h> 15#include <linux/delay.h>
16#include <linux/types.h> 16#include <linux/types.h>
17 17
18#include "nvgpu/semaphore.h" 18#include <nvgpu/semaphore.h>
19#include <nvgpu/timers.h> 19#include <nvgpu/timers.h>
20#include <nvgpu/log.h>
20 21
21 22
22#include "gk20a/gk20a.h" 23#include "gk20a/gk20a.h"
23#include "gk20a/fifo_gk20a.h" 24#include "gk20a/fifo_gk20a.h"
24#include "gk20a/ctxsw_trace_gk20a.h" 25#include "gk20a/ctxsw_trace_gk20a.h"
26#include "gk20a/channel_gk20a.h"
25 27
26#include "gp10b/fifo_gp10b.h" 28#include "gp10b/fifo_gp10b.h"
27 29
@@ -862,7 +864,7 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
862 gk20a_dbg_info("hw id =%d", id); 864 gk20a_dbg_info("hw id =%d", id);
863 gk20a_dbg_info("id_type =%d", id_type); 865 gk20a_dbg_info("id_type =%d", id_type);
864 gk20a_dbg_info("rc_type =%d", rc_type); 866 gk20a_dbg_info("rc_type =%d", rc_type);
865 gk20a_dbg_info("mmu_fault =%p", mmfault); 867 gk20a_dbg_info("mmu_fault =0x%p", mmfault);
866 868
867 runlists_mask = gv11b_fifo_get_runlists_mask(g, act_eng_bitmask, id, 869 runlists_mask = gv11b_fifo_get_runlists_mask(g, act_eng_bitmask, id,
868 id_type, rc_type, mmfault); 870 id_type, rc_type, mmfault);
@@ -1060,7 +1062,8 @@ static u32 gv11b_fifo_intr_0_en_mask(struct gk20a *g)
1060 intr_0_en_mask = g->ops.fifo.intr_0_error_mask(g); 1062 intr_0_en_mask = g->ops.fifo.intr_0_error_mask(g);
1061 1063
1062 intr_0_en_mask |= fifo_intr_0_runlist_event_pending_f() | 1064 intr_0_en_mask |= fifo_intr_0_runlist_event_pending_f() |
1063 fifo_intr_0_pbdma_intr_pending_f(); 1065 fifo_intr_0_pbdma_intr_pending_f() |
1066 fifo_intr_0_ctxsw_timeout_pending_f();
1064 1067
1065 return intr_0_en_mask; 1068 return intr_0_en_mask;
1066} 1069}
@@ -1072,6 +1075,7 @@ int gv11b_init_fifo_reset_enable_hw(struct gk20a *g)
1072 u32 timeout; 1075 u32 timeout;
1073 unsigned int i; 1076 unsigned int i;
1074 u32 host_num_pbdma = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA); 1077 u32 host_num_pbdma = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA);
1078 struct gk20a_platform *platform = dev_get_drvdata(g->dev);
1075 1079
1076 gk20a_dbg_fn(""); 1080 gk20a_dbg_fn("");
1077 1081
@@ -1123,6 +1127,16 @@ int gv11b_init_fifo_reset_enable_hw(struct gk20a *g)
1123 gk20a_writel(g, pbdma_intr_en_1_r(i), intr_stall); 1127 gk20a_writel(g, pbdma_intr_en_1_r(i), intr_stall);
1124 } 1128 }
1125 1129
1130 /* clear ctxsw timeout interrupts */
1131 gk20a_writel(g, fifo_intr_ctxsw_timeout_r(), ~0);
1132
1133 /* enable ctxsw timeout */
1134 timeout = GRFIFO_TIMEOUT_CHECK_PERIOD_US;
1135 timeout = scale_ptimer(timeout,
1136 ptimer_scalingfactor10x(platform->ptimer_src_freq));
1137 timeout |= fifo_eng_ctxsw_timeout_detection_enabled_f();
1138 gk20a_writel(g, fifo_eng_ctxsw_timeout_r(), timeout);
1139
1126 /* clear runlist interrupts */ 1140 /* clear runlist interrupts */
1127 gk20a_writel(g, fifo_intr_runlist_r(), ~0); 1141 gk20a_writel(g, fifo_intr_runlist_r(), ~0);
1128 1142
@@ -1139,6 +1153,221 @@ int gv11b_init_fifo_reset_enable_hw(struct gk20a *g)
1139 return 0; 1153 return 0;
1140} 1154}
1141 1155
1156static const char *const gv11b_sched_error_str[] = {
1157 "xxx-0",
1158 "xxx-1",
1159 "xxx-2",
1160 "xxx-3",
1161 "xxx-4",
1162 "engine_reset",
1163 "rl_ack_timeout",
1164 "rl_ack_extra",
1165 "rl_rdat_timeout",
1166 "rl_rdat_extra",
1167 "xxx-a",
1168 "xxx-b",
1169 "rl_req_timeout",
1170 "new_runlist",
1171 "code_config_while_busy",
1172 "xxx-f",
1173 "xxx-0x10",
1174 "xxx-0x11",
1175 "xxx-0x12",
1176 "xxx-0x13",
1177 "xxx-0x14",
1178 "xxx-0x15",
1179 "xxx-0x16",
1180 "xxx-0x17",
1181 "xxx-0x18",
1182 "xxx-0x19",
1183 "xxx-0x1a",
1184 "xxx-0x1b",
1185 "xxx-0x1c",
1186 "xxx-0x1d",
1187 "xxx-0x1e",
1188 "xxx-0x1f",
1189 "bad_tsg",
1190};
1191
1192static bool gv11b_fifo_handle_sched_error(struct gk20a *g)
1193{
1194 u32 sched_error;
1195
1196 sched_error = gk20a_readl(g, fifo_intr_sched_error_r());
1197
1198 if (sched_error < ARRAY_SIZE(gv11b_sched_error_str))
1199 nvgpu_err(g, "fifo sched error :%s",
1200 gv11b_sched_error_str[sched_error]);
1201 else
1202 nvgpu_err(g, "fifo sched error code not supported");
1203
1204 if (sched_error == SCHED_ERROR_CODE_BAD_TSG ) {
1205 /* id is unknown, preempt all runlists and do recovery */
1206 gk20a_fifo_recover(g, 0, 0, false, false, false);
1207 }
1208
1209 return false;
1210}
1211
1212static u32 gv11b_fifo_ctxsw_timeout_info(struct gk20a *g, u32 active_eng_id)
1213{
1214 u32 tsgid = FIFO_INVAL_TSG_ID;
1215 u32 timeout_info;
1216 u32 ctx_status, info_status;
1217
1218 timeout_info = gk20a_readl(g,
1219 fifo_intr_ctxsw_timeout_info_r(active_eng_id));
1220
1221 /*
1222 * ctxsw_state and tsgid are snapped at the point of the timeout and
1223 * will not change while the corresponding INTR_CTXSW_TIMEOUT_ENGINE bit
1224 * is PENDING.
1225 */
1226 ctx_status = fifo_intr_ctxsw_timeout_info_ctxsw_state_v(timeout_info);
1227 if (ctx_status ==
1228 fifo_intr_ctxsw_timeout_info_ctxsw_state_load_v()) {
1229
1230 tsgid = fifo_intr_ctxsw_timeout_info_next_tsgid_v(timeout_info);
1231
1232 } else if (ctx_status ==
1233 fifo_intr_ctxsw_timeout_info_ctxsw_state_switch_v() ||
1234 ctx_status ==
1235 fifo_intr_ctxsw_timeout_info_ctxsw_state_save_v()) {
1236
1237 tsgid = fifo_intr_ctxsw_timeout_info_prev_tsgid_v(timeout_info);
1238 }
1239 gk20a_dbg_info("ctxsw timeout info: tsgid = %d", tsgid);
1240
1241 /*
1242 * STATUS indicates whether the context request ack was eventually
1243 * received and whether a subsequent request timed out. This field is
1244 * updated live while the corresponding INTR_CTXSW_TIMEOUT_ENGINE bit
1245 * is PENDING. STATUS starts in AWAITING_ACK, and progresses to
1246 * ACK_RECEIVED and finally ends with DROPPED_TIMEOUT.
1247 *
1248 * AWAITING_ACK - context request ack still not returned from engine.
1249 * ENG_WAS_RESET - The engine was reset via a PRI write to NV_PMC_ENABLE
1250 * or NV_PMC_ELPG_ENABLE prior to receiving the ack. Host will not
1251 * expect ctx ack to return, but if it is already in flight, STATUS will
1252 * transition shortly to ACK_RECEIVED unless the interrupt is cleared
1253 * first. Once the engine is reset, additional context switches can
1254 * occur; if one times out, STATUS will transition to DROPPED_TIMEOUT
1255 * if the interrupt isn't cleared first.
1256 * ACK_RECEIVED - The ack for the timed-out context request was
1257 * received between the point of the timeout and this register being
1258 * read. Note this STATUS can be reported during the load stage of the
1259 * same context switch that timed out if the timeout occurred during the
1260 * save half of a context switch. Additional context requests may have
1261 * completed or may be outstanding, but no further context timeout has
1262 * occurred. This simplifies checking for spurious context switch
1263 * timeouts.
1264 * DROPPED_TIMEOUT - The originally timed-out context request acked,
1265 * but a subsequent context request then timed out.
1266 * Information about the subsequent timeout is not stored; in fact, that
1267 * context request may also have already been acked by the time SW
1268 * SW reads this register. If not, there is a chance SW can get the
1269 * dropped information by clearing the corresponding
1270 * INTR_CTXSW_TIMEOUT_ENGINE bit and waiting for the timeout to occur
1271 * again. Note, however, that if the engine does time out again,
1272 * it may not be from the original request that caused the
1273 * DROPPED_TIMEOUT state, as that request may
1274 * be acked in the interim.
1275 */
1276 info_status = fifo_intr_ctxsw_timeout_info_status_v(timeout_info);
1277 if (info_status ==
1278 fifo_intr_ctxsw_timeout_info_status_awaiting_ack_v()) {
1279
1280 gk20a_dbg_info("ctxsw timeout info : awaiting ack");
1281
1282 } else if (info_status ==
1283 fifo_intr_ctxsw_timeout_info_status_eng_was_reset_v()) {
1284
1285 gk20a_dbg_info("ctxsw timeout info : eng was reset");
1286
1287 } else if (info_status ==
1288 fifo_intr_ctxsw_timeout_info_status_ack_received_v()) {
1289
1290 gk20a_dbg_info("ctxsw timeout info : ack received");
1291 /* no need to recover */
1292 tsgid = FIFO_INVAL_TSG_ID;
1293
1294 } else if (info_status ==
1295 fifo_intr_ctxsw_timeout_info_status_dropped_timeout_v()) {
1296
1297 gk20a_dbg_info("ctxsw timeout info : dropped timeout");
1298 /* no need to recover */
1299 tsgid = FIFO_INVAL_TSG_ID;
1300
1301 } else {
1302 gk20a_dbg_info("ctxsw timeout info status = %u", info_status);
1303 }
1304
1305 return tsgid;
1306}
1307
1308static bool gv11b_fifo_handle_ctxsw_timeout(struct gk20a *g, u32 fifo_intr)
1309{
1310 bool ret = false;
1311 u32 tsgid = FIFO_INVAL_TSG_ID;
1312 u32 engine_id, active_eng_id;
1313 u32 timeout_val, ctxsw_timeout_engines;
1314
1315
1316 if (!(fifo_intr & fifo_intr_0_ctxsw_timeout_pending_f()))
1317 return ret;
1318
1319 /* get ctxsw timedout engines */
1320 ctxsw_timeout_engines = gk20a_readl(g, fifo_intr_ctxsw_timeout_r());
1321 if (ctxsw_timeout_engines == 0) {
1322 nvgpu_err(g, "no eng ctxsw timeout pending");
1323 return ret;
1324 }
1325
1326 timeout_val = gk20a_readl(g, fifo_eng_ctxsw_timeout_r());
1327 timeout_val = fifo_eng_ctxsw_timeout_period_v(timeout_val);
1328
1329 gk20a_dbg_info("eng ctxsw timeout period = 0x%x", timeout_val);
1330
1331 for (engine_id = 0; engine_id < g->fifo.num_engines; engine_id++) {
1332 active_eng_id = g->fifo.active_engines_list[engine_id];
1333
1334 if (ctxsw_timeout_engines &
1335 fifo_intr_ctxsw_timeout_engine_pending_f(
1336 active_eng_id)) {
1337
1338 struct fifo_gk20a *f = &g->fifo;
1339 u32 ms = 0;
1340 bool verbose = false;
1341
1342 tsgid = gv11b_fifo_ctxsw_timeout_info(g, active_eng_id);
1343
1344 if (tsgid == FIFO_INVAL_TSG_ID)
1345 continue;
1346
1347 if (gk20a_fifo_check_tsg_ctxsw_timeout(
1348 &f->tsg[tsgid], &verbose, &ms)) {
1349 ret = true;
1350 nvgpu_err(g,
1351 "ctxsw timeout error:"
1352 "active engine id =%u, %s=%d, ms=%u",
1353 active_eng_id, "tsg", tsgid, ms);
1354
1355 /* Cancel all channels' timeout */
1356 gk20a_channel_timeout_restart_all_channels(g);
1357 gk20a_fifo_recover(g, BIT(active_eng_id), tsgid,
1358 true, true, verbose);
1359 } else {
1360 gk20a_dbg_info(
1361 "fifo is waiting for ctx switch: "
1362 "for %d ms, %s=%d", ms, "tsg", tsgid);
1363 }
1364 }
1365 }
1366 /* clear interrupt */
1367 gk20a_writel(g, fifo_intr_ctxsw_timeout_r(), ctxsw_timeout_engines);
1368 return ret;
1369}
1370
1142void gv11b_init_fifo(struct gpu_ops *gops) 1371void gv11b_init_fifo(struct gpu_ops *gops)
1143{ 1372{
1144 gp10b_init_fifo(gops); 1373 gp10b_init_fifo(gops);
@@ -1169,4 +1398,6 @@ void gv11b_init_fifo(struct gpu_ops *gops)
1169 gops->fifo.init_pbdma_intr_descs = gv11b_fifo_init_pbdma_intr_descs; 1398 gops->fifo.init_pbdma_intr_descs = gv11b_fifo_init_pbdma_intr_descs;
1170 gops->fifo.reset_enable_hw = gv11b_init_fifo_reset_enable_hw; 1399 gops->fifo.reset_enable_hw = gv11b_init_fifo_reset_enable_hw;
1171 gops->fifo.teardown_ch_tsg = gv11b_fifo_teardown_ch_tsg; 1400 gops->fifo.teardown_ch_tsg = gv11b_fifo_teardown_ch_tsg;
1401 gops->fifo.handle_sched_error = gv11b_fifo_handle_sched_error;
1402 gops->fifo.handle_ctxsw_timeout = gv11b_fifo_handle_ctxsw_timeout;
1172} 1403}