diff options
author | Deepak Nibade <dnibade@nvidia.com> | 2015-12-18 02:05:04 -0500 |
---|---|---|
committer | Deepak Nibade <dnibade@nvidia.com> | 2016-12-27 04:52:10 -0500 |
commit | de47308b2c2ef2d24951a7e1c4ece9964417c167 (patch) | |
tree | e35cf4a956fb2580cd63f50cdf9d422b2d0763df /drivers/gpu/nvgpu/gp10b/gr_gp10b.c | |
parent | 095bd5e59d896ebab12af25ac05aa4071257ecb1 (diff) |
gpu: nvgpu: add CILP support for gp10b
Add CILP support for gp10b by defining below function
pointers (with detailed explanation)
pre_process_sm_exception()
- for CILP enabled channels, get the mask of errors
- if we need to broadcast the stop_trigger, suspend all SMs
- otherwise suspend only current SM
- clear hww_global_esr values in h/w
- gr_gp10b_set_cilp_preempt_pending()
- get ctx_id
- using sideband method, program FECS to generate
interrupt on next ctxsw
- disable and preempt the channel/TSG
- set cilp_preempt_pending = true
- clear single step mode
- resume current SM
handle_fecs_error()
- we get ctxsw_intr1 upon next ctxsw
- clear this interrupt
- get handle of channel on which we first
triggered SM exception
- gr_gp10b_clear_cilp_preempt_pending()
- set cilp_preempt_pending = false
- send events to channel and debug session fd
Bug 200156699
Change-Id: Ia765db47e68fb968fada6409609af505c079df53
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/925897
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gp10b/gr_gp10b.c')
-rw-r--r-- | drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 314 |
1 files changed, 314 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index a13b9a2c..91adf20c 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c | |||
@@ -19,6 +19,7 @@ | |||
19 | 19 | ||
20 | #include "gk20a/gr_gk20a.h" | 20 | #include "gk20a/gr_gk20a.h" |
21 | #include "gk20a/semaphore_gk20a.h" | 21 | #include "gk20a/semaphore_gk20a.h" |
22 | #include "gk20a/dbg_gpu_gk20a.h" | ||
22 | 23 | ||
23 | #include "gm20b/gr_gm20b.h" /* for MAXWELL classes */ | 24 | #include "gm20b/gr_gm20b.h" /* for MAXWELL classes */ |
24 | #include "gp10b/gr_gp10b.h" | 25 | #include "gp10b/gr_gp10b.h" |
@@ -657,6 +658,8 @@ static int gr_gp10b_alloc_gr_ctx(struct gk20a *g, | |||
657 | if (err) | 658 | if (err) |
658 | return err; | 659 | return err; |
659 | 660 | ||
661 | (*gr_ctx)->t18x.ctx_id_valid = false; | ||
662 | |||
660 | if (class == PASCAL_A && g->gr.t18x.ctx_vars.force_preemption_gfxp) | 663 | if (class == PASCAL_A && g->gr.t18x.ctx_vars.force_preemption_gfxp) |
661 | flags |= NVGPU_ALLOC_OBJ_FLAGS_GFXP; | 664 | flags |= NVGPU_ALLOC_OBJ_FLAGS_GFXP; |
662 | 665 | ||
@@ -1224,6 +1227,314 @@ static void gr_gp10b_get_access_map(struct gk20a *g, | |||
1224 | *num_entries = ARRAY_SIZE(wl_addr_gp10b); | 1227 | *num_entries = ARRAY_SIZE(wl_addr_gp10b); |
1225 | } | 1228 | } |
1226 | 1229 | ||
1230 | static int gr_gp10b_disable_channel_or_tsg(struct gk20a *g, struct channel_gk20a *fault_ch) | ||
1231 | { | ||
1232 | int ret = 0; | ||
1233 | |||
1234 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, ""); | ||
1235 | |||
1236 | ret = gk20a_disable_channel_tsg(g, fault_ch); | ||
1237 | if (ret) { | ||
1238 | gk20a_err(dev_from_gk20a(g), | ||
1239 | "CILP: failed to disable channel/TSG!\n"); | ||
1240 | return ret; | ||
1241 | } | ||
1242 | |||
1243 | ret = g->ops.fifo.update_runlist(g, 0, ~0, true, false); | ||
1244 | if (ret) { | ||
1245 | gk20a_err(dev_from_gk20a(g), | ||
1246 | "CILP: failed to restart runlist 0!"); | ||
1247 | return ret; | ||
1248 | } | ||
1249 | |||
1250 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "CILP: restarted runlist"); | ||
1251 | |||
1252 | if (gk20a_is_channel_marked_as_tsg(fault_ch)) | ||
1253 | gk20a_fifo_issue_preempt(g, fault_ch->tsgid, true); | ||
1254 | else | ||
1255 | gk20a_fifo_issue_preempt(g, fault_ch->hw_chid, false); | ||
1256 | |||
1257 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "CILP: preempted the channel/tsg"); | ||
1258 | |||
1259 | return ret; | ||
1260 | } | ||
1261 | |||
1262 | static int gr_gp10b_set_cilp_preempt_pending(struct gk20a *g, struct channel_gk20a *fault_ch) | ||
1263 | { | ||
1264 | int ret; | ||
1265 | struct gr_ctx_desc *gr_ctx = fault_ch->ch_ctx.gr_ctx; | ||
1266 | |||
1267 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, ""); | ||
1268 | |||
1269 | if (!gr_ctx) | ||
1270 | return -EINVAL; | ||
1271 | |||
1272 | if (gr_ctx->t18x.cilp_preempt_pending) { | ||
1273 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, | ||
1274 | "CILP is already pending for chid %d", | ||
1275 | fault_ch->hw_chid); | ||
1276 | return 0; | ||
1277 | } | ||
1278 | |||
1279 | /* get ctx_id from the ucode image */ | ||
1280 | if (!gr_ctx->t18x.ctx_id_valid) { | ||
1281 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, | ||
1282 | "CILP: looking up ctx id"); | ||
1283 | ret = gr_gk20a_get_ctx_id(g, fault_ch, &gr_ctx->t18x.ctx_id); | ||
1284 | if (ret) { | ||
1285 | gk20a_err(dev_from_gk20a(g), "CILP: error looking up ctx id!\n"); | ||
1286 | return ret; | ||
1287 | } | ||
1288 | gr_ctx->t18x.ctx_id_valid = true; | ||
1289 | } | ||
1290 | |||
1291 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, | ||
1292 | "CILP: ctx id is 0x%x", gr_ctx->t18x.ctx_id); | ||
1293 | |||
1294 | /* send ucode method to set ctxsw interrupt */ | ||
1295 | ret = gr_gk20a_submit_fecs_sideband_method_op(g, | ||
1296 | (struct fecs_method_op_gk20a) { | ||
1297 | .method.data = gr_ctx->t18x.ctx_id, | ||
1298 | .method.addr = | ||
1299 | gr_fecs_method_push_adr_configure_interrupt_completion_option_v(), | ||
1300 | .mailbox = { | ||
1301 | .id = 1 /* sideband */, .data = 0, | ||
1302 | .clr = ~0, .ret = NULL, | ||
1303 | .ok = gr_fecs_ctxsw_mailbox_value_pass_v(), | ||
1304 | .fail = 0}, | ||
1305 | .cond.ok = GR_IS_UCODE_OP_EQUAL, | ||
1306 | .cond.fail = GR_IS_UCODE_OP_SKIP}); | ||
1307 | |||
1308 | if (ret) { | ||
1309 | gk20a_err(dev_from_gk20a(g), | ||
1310 | "CILP: failed to enable ctxsw interrupt!"); | ||
1311 | return ret; | ||
1312 | } | ||
1313 | |||
1314 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, | ||
1315 | "CILP: enabled ctxsw completion interrupt"); | ||
1316 | |||
1317 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, | ||
1318 | "CILP: disabling channel %d", | ||
1319 | fault_ch->hw_chid); | ||
1320 | |||
1321 | ret = gr_gp10b_disable_channel_or_tsg(g, fault_ch); | ||
1322 | if (ret) { | ||
1323 | gk20a_err(dev_from_gk20a(g), | ||
1324 | "CILP: failed to disable channel!!"); | ||
1325 | return ret; | ||
1326 | } | ||
1327 | |||
1328 | /* set cilp_preempt_pending = true and record the channel */ | ||
1329 | gr_ctx->t18x.cilp_preempt_pending = true; | ||
1330 | g->gr.t18x.cilp_preempt_pending_chid = fault_ch->hw_chid; | ||
1331 | |||
1332 | return 0; | ||
1333 | } | ||
1334 | |||
1335 | static int gr_gp10b_clear_cilp_preempt_pending(struct gk20a *g, | ||
1336 | struct channel_gk20a *fault_ch) | ||
1337 | { | ||
1338 | struct gr_ctx_desc *gr_ctx = fault_ch->ch_ctx.gr_ctx; | ||
1339 | |||
1340 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, ""); | ||
1341 | |||
1342 | if (!gr_ctx) | ||
1343 | return -EINVAL; | ||
1344 | |||
1345 | /* The ucode is self-clearing, so all we need to do here is | ||
1346 | to clear cilp_preempt_pending. */ | ||
1347 | if (!gr_ctx->t18x.cilp_preempt_pending) { | ||
1348 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, | ||
1349 | "CILP is already cleared for chid %d\n", | ||
1350 | fault_ch->hw_chid); | ||
1351 | return 0; | ||
1352 | } | ||
1353 | |||
1354 | gr_ctx->t18x.cilp_preempt_pending = false; | ||
1355 | g->gr.t18x.cilp_preempt_pending_chid = -1; | ||
1356 | |||
1357 | return 0; | ||
1358 | } | ||
1359 | |||
1360 | /* @brief pre-process work on the SM exceptions to determine if we clear them or not. | ||
1361 | * | ||
1362 | * On Pascal, if we are in CILP preemtion mode, preempt the channel and handle errors with special processing | ||
1363 | */ | ||
1364 | int gr_gp10b_pre_process_sm_exception(struct gk20a *g, | ||
1365 | u32 gpc, u32 tpc, u32 global_esr, u32 warp_esr, | ||
1366 | bool sm_debugger_attached, struct channel_gk20a *fault_ch, | ||
1367 | bool *early_exit, bool *ignore_debugger) | ||
1368 | { | ||
1369 | int ret; | ||
1370 | bool cilp_enabled = (fault_ch->ch_ctx.gr_ctx->preempt_mode == | ||
1371 | NVGPU_GR_PREEMPTION_MODE_CILP) ; | ||
1372 | u32 global_mask = 0, dbgr_control0, global_esr_copy; | ||
1373 | u32 offset = proj_gpc_stride_v() * gpc + | ||
1374 | proj_tpc_in_gpc_stride_v() * tpc; | ||
1375 | |||
1376 | *early_exit = false; | ||
1377 | *ignore_debugger = false; | ||
1378 | |||
1379 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "SM Exception received on gpc %d tpc %d = %u\n", | ||
1380 | gpc, tpc, global_esr); | ||
1381 | |||
1382 | if (cilp_enabled && sm_debugger_attached) { | ||
1383 | if (global_esr & gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f()) | ||
1384 | gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset, | ||
1385 | gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f()); | ||
1386 | |||
1387 | if (global_esr & gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f()) | ||
1388 | gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset, | ||
1389 | gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f()); | ||
1390 | |||
1391 | global_mask = gr_gpc0_tpc0_sm_hww_global_esr_sm_to_sm_fault_pending_f() | | ||
1392 | gr_gpcs_tpcs_sm_hww_global_esr_l1_error_pending_f() | | ||
1393 | gr_gpcs_tpcs_sm_hww_global_esr_multiple_warp_errors_pending_f() | | ||
1394 | gr_gpcs_tpcs_sm_hww_global_esr_physical_stack_overflow_error_pending_f() | | ||
1395 | gr_gpcs_tpcs_sm_hww_global_esr_timeout_error_pending_f() | | ||
1396 | gr_gpcs_tpcs_sm_hww_global_esr_bpt_pause_pending_f(); | ||
1397 | |||
1398 | if (warp_esr != 0 || (global_esr & global_mask) != 0) { | ||
1399 | *ignore_debugger = true; | ||
1400 | |||
1401 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, | ||
1402 | "CILP: starting wait for LOCKED_DOWN on gpc %d tpc %d\n", | ||
1403 | gpc, tpc); | ||
1404 | |||
1405 | if (gk20a_dbg_gpu_broadcast_stop_trigger(fault_ch)) { | ||
1406 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, | ||
1407 | "CILP: Broadcasting STOP_TRIGGER from gpc %d tpc %d\n", | ||
1408 | gpc, tpc); | ||
1409 | gk20a_suspend_all_sms(g, global_mask, false); | ||
1410 | |||
1411 | gk20a_dbg_gpu_clear_broadcast_stop_trigger(fault_ch); | ||
1412 | } else { | ||
1413 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, | ||
1414 | "CILP: STOP_TRIGGER from gpc %d tpc %d\n", | ||
1415 | gpc, tpc); | ||
1416 | gk20a_suspend_single_sm(g, gpc, tpc, global_mask, true); | ||
1417 | } | ||
1418 | |||
1419 | /* reset the HWW errors after locking down */ | ||
1420 | global_esr_copy = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset); | ||
1421 | gk20a_gr_clear_sm_hww(g, gpc, tpc, global_esr_copy); | ||
1422 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, | ||
1423 | "CILP: HWWs cleared for gpc %d tpc %d\n", | ||
1424 | gpc, tpc); | ||
1425 | |||
1426 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "CILP: Setting CILP preempt pending\n"); | ||
1427 | ret = gr_gp10b_set_cilp_preempt_pending(g, fault_ch); | ||
1428 | if (ret) { | ||
1429 | gk20a_err(dev_from_gk20a(g), "CILP: error while setting CILP preempt pending!\n"); | ||
1430 | return ret; | ||
1431 | } | ||
1432 | |||
1433 | dbgr_control0 = gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r() + offset); | ||
1434 | if (dbgr_control0 & gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_enable_f()) { | ||
1435 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, | ||
1436 | "CILP: clearing SINGLE_STEP_MODE before resume for gpc %d tpc %d\n", | ||
1437 | gpc, tpc); | ||
1438 | dbgr_control0 = set_field(dbgr_control0, | ||
1439 | gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_m(), | ||
1440 | gr_gpcs_tpcs_sm_dbgr_control0_single_step_mode_disable_f()); | ||
1441 | gk20a_writel(g, gr_gpc0_tpc0_sm_dbgr_control0_r() + offset, dbgr_control0); | ||
1442 | } | ||
1443 | |||
1444 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, | ||
1445 | "CILP: resume for gpc %d tpc %d\n", | ||
1446 | gpc, tpc); | ||
1447 | gk20a_resume_single_sm(g, gpc, tpc); | ||
1448 | |||
1449 | *ignore_debugger = true; | ||
1450 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "CILP: All done on gpc %d, tpc %d\n", gpc, tpc); | ||
1451 | } | ||
1452 | |||
1453 | *early_exit = true; | ||
1454 | } | ||
1455 | return 0; | ||
1456 | } | ||
1457 | |||
1458 | static int gr_gp10b_get_cilp_preempt_pending_chid(struct gk20a *g, int *__chid) | ||
1459 | { | ||
1460 | struct gr_ctx_desc *gr_ctx; | ||
1461 | struct channel_gk20a *ch; | ||
1462 | int chid; | ||
1463 | int ret = -EINVAL; | ||
1464 | |||
1465 | chid = g->gr.t18x.cilp_preempt_pending_chid; | ||
1466 | |||
1467 | ch = gk20a_channel_get(gk20a_fifo_channel_from_hw_chid(g, chid)); | ||
1468 | if (!ch) | ||
1469 | return ret; | ||
1470 | |||
1471 | gr_ctx = ch->ch_ctx.gr_ctx; | ||
1472 | |||
1473 | if (gr_ctx->t18x.cilp_preempt_pending) { | ||
1474 | *__chid = chid; | ||
1475 | ret = 0; | ||
1476 | } | ||
1477 | |||
1478 | gk20a_channel_put(ch); | ||
1479 | |||
1480 | return ret; | ||
1481 | } | ||
1482 | |||
1483 | static int gr_gp10b_handle_fecs_error(struct gk20a *g, | ||
1484 | struct channel_gk20a *__ch, | ||
1485 | struct gr_gk20a_isr_data *isr_data) | ||
1486 | { | ||
1487 | u32 gr_fecs_intr = gk20a_readl(g, gr_fecs_host_int_status_r()); | ||
1488 | struct channel_gk20a *ch; | ||
1489 | int chid = -1; | ||
1490 | int ret = 0; | ||
1491 | |||
1492 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, ""); | ||
1493 | |||
1494 | /* | ||
1495 | * INTR1 (bit 1 of the HOST_INT_STATUS_CTXSW_INTR) | ||
1496 | * indicates that a CILP ctxsw save has finished | ||
1497 | */ | ||
1498 | if (gr_fecs_intr & gr_fecs_host_int_status_ctxsw_intr_f(2)) { | ||
1499 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, | ||
1500 | "CILP: ctxsw save completed!\n"); | ||
1501 | |||
1502 | /* now clear the interrupt */ | ||
1503 | gk20a_writel(g, gr_fecs_host_int_clear_r(), | ||
1504 | gr_fecs_host_int_clear_ctxsw_intr1_clear_f()); | ||
1505 | |||
1506 | ret = gr_gp10b_get_cilp_preempt_pending_chid(g, &chid); | ||
1507 | if (ret) | ||
1508 | goto clean_up; | ||
1509 | |||
1510 | ch = gk20a_channel_get( | ||
1511 | gk20a_fifo_channel_from_hw_chid(g, chid)); | ||
1512 | if (!ch) | ||
1513 | goto clean_up; | ||
1514 | |||
1515 | |||
1516 | /* set preempt_pending to false */ | ||
1517 | ret = gr_gp10b_clear_cilp_preempt_pending(g, ch); | ||
1518 | if (ret) { | ||
1519 | gk20a_err(dev_from_gk20a(g), "CILP: error while unsetting CILP preempt pending!\n"); | ||
1520 | gk20a_channel_put(ch); | ||
1521 | goto clean_up; | ||
1522 | } | ||
1523 | |||
1524 | if (gk20a_gr_sm_debugger_attached(g)) { | ||
1525 | gk20a_err(dev_from_gk20a(g), "CILP: posting usermode event"); | ||
1526 | gk20a_dbg_gpu_post_events(ch); | ||
1527 | gk20a_channel_post_event(ch); | ||
1528 | } | ||
1529 | |||
1530 | gk20a_channel_put(ch); | ||
1531 | } | ||
1532 | |||
1533 | clean_up: | ||
1534 | /* handle any remaining interrupts */ | ||
1535 | return gk20a_gr_handle_fecs_error(g, __ch, isr_data); | ||
1536 | } | ||
1537 | |||
1227 | static u32 gp10b_mask_hww_warp_esr(u32 hww_warp_esr) | 1538 | static u32 gp10b_mask_hww_warp_esr(u32 hww_warp_esr) |
1228 | { | 1539 | { |
1229 | if (!(hww_warp_esr & gr_gpc0_tpc0_sm_hww_warp_esr_addr_valid_m())) | 1540 | if (!(hww_warp_esr & gr_gpc0_tpc0_sm_hww_warp_esr_addr_valid_m())) |
@@ -1267,4 +1578,7 @@ void gp10b_init_gr(struct gpu_ops *gops) | |||
1267 | gops->gr.handle_sm_exception = gr_gp10b_handle_sm_exception; | 1578 | gops->gr.handle_sm_exception = gr_gp10b_handle_sm_exception; |
1268 | gops->gr.handle_tex_exception = gr_gp10b_handle_tex_exception; | 1579 | gops->gr.handle_tex_exception = gr_gp10b_handle_tex_exception; |
1269 | gops->gr.mask_hww_warp_esr = gp10b_mask_hww_warp_esr; | 1580 | gops->gr.mask_hww_warp_esr = gp10b_mask_hww_warp_esr; |
1581 | gops->gr.pre_process_sm_exception = | ||
1582 | gr_gp10b_pre_process_sm_exception; | ||
1583 | gops->gr.handle_fecs_error = gr_gp10b_handle_fecs_error; | ||
1270 | } | 1584 | } |